提交 8a83fdcb authored 作者: Dustin Webb's avatar Dustin Webb

Incorporated reviewer comments.

上级 b9609e87
...@@ -411,9 +411,6 @@ class GpuDnnSoftmax(GpuOp): ...@@ -411,9 +411,6 @@ class GpuDnnSoftmax(GpuOp):
assert(mode in ('instance', 'channel')) assert(mode in ('instance', 'channel'))
self.mode = mode self.mode = mode
def __str__(self):
return self.__class__.__name__
def make_node(self, x): def make_node(self, x):
x = as_cuda_ndarray_variable(x) x = as_cuda_ndarray_variable(x)
assert x.ndim == 4 assert x.ndim == 4
...@@ -504,6 +501,11 @@ cudnnSoftmaxMode_t mode%(id)d = CUDNN_SOFTMAX_MODE_CHANNEL; ...@@ -504,6 +501,11 @@ cudnnSoftmaxMode_t mode%(id)d = CUDNN_SOFTMAX_MODE_CHANNEL;
if (%(mode)d == 1) if (%(mode)d == 1)
mode%(id)d = CUDNN_SOFTMAX_MODE_INSTANCE; mode%(id)d = CUDNN_SOFTMAX_MODE_INSTANCE;
if (!CudaNdarray_is_c_contiguous(%(ins)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)s
}
err%(name)s = cudnnSetTensor4dDescriptor( err%(name)s = cudnnSetTensor4dDescriptor(
softmax_input_%(id)d, softmax_input_%(id)d,
format%(id)d, format%(id)d,
...@@ -519,7 +521,10 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -519,7 +521,10 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(fail)s %(fail)s
} }
CudaNdarray_prep_output(&(%(outs)s), 4, CudaNdarray_HOST_DIMS(%(ins)s)); if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
{
$(fail)s
}
err%(name)s = cudnnSetTensor4dDescriptor( err%(name)s = cudnnSetTensor4dDescriptor(
softmax_output_%(id)d, softmax_output_%(id)d,
...@@ -556,7 +561,7 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -556,7 +561,7 @@ err%(name)s = cudnnSoftmaxForward(
def local_softmax_dnn(node): def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle('x', 'x', 0, 1) ins = node.inputs[0].dimshuffle('x', 'x', 0, 1)
out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(ins) out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(2, 3)) out = as_cuda_ndarray_variable(out.dimshuffle(2, 3))
return [out] return [out]
......
...@@ -242,9 +242,9 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): ...@@ -242,9 +242,9 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
cmp(128, 16 * 1024, f, f_gpu) cmp(128, 16 * 1024, f, f_gpu)
cmp(128, 64 * 1024, f, f_gpu) cmp(128, 64 * 1024, f, f_gpu)
cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows
cmp(2 << 15, 5, f, f_gpu) cmp(5, 2 << 15, f, f_gpu)
cmp(0, 10, f, f_gpu)
return f, f_gpu
def test_softmax(): def test_softmax():
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
...@@ -256,8 +256,10 @@ def test_softmax(): ...@@ -256,8 +256,10 @@ def test_softmax():
x = T.fmatrix('x') x = T.fmatrix('x')
z = T.nnet.softmax z = T.nnet.softmax
_test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2) f, f_gpu = _test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2)
cmp(2 << 15, 5, f, f_gpu)
cmp(0, 10, f, f_gpu)
def test_cudnn_softmax(): def test_cudnn_softmax():
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论