提交 113afd29 authored 作者: Dustin Webb's avatar Dustin Webb

Refactored GpuDnnSoftmax to use new DnnBase.

上级 aef17262
...@@ -386,7 +386,7 @@ def local_conv_dnn(node): ...@@ -386,7 +386,7 @@ def local_conv_dnn(node):
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn') gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
class GpuDnnSoftmax(GpuOp): class GpuDnnSoftmax(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -416,30 +416,19 @@ class GpuDnnSoftmax(GpuOp): ...@@ -416,30 +416,19 @@ class GpuDnnSoftmax(GpuOp):
assert x.ndim == 4 assert x.ndim == 4
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_libraries(self):
return ['cudnn']
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, struct_id):
return """ return """
cudnnHandle_t softmax_handle_%(id)d;
cudnnTensor4dDescriptor_t softmax_input_%(id)d; cudnnTensor4dDescriptor_t softmax_input_%(id)d;
cudnnTensor4dDescriptor_t softmax_output_%(id)d; cudnnTensor4dDescriptor_t softmax_output_%(id)d;
""" % dict(id=struct_id) """ % dict(id=struct_id)
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, struct_id, sub):
return """ return """
softmax_handle_%(id)d = NULL;
softmax_input_%(id)d = NULL; softmax_input_%(id)d = NULL;
softmax_output_%(id)d = NULL; softmax_output_%(id)d = NULL;
cudnnStatus_t err%(id)d; cudnnStatus_t err%(id)d;
if ((err%(id)d = cudnnCreate(&softmax_handle_%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s", PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s",
cudnnGetErrorString(err%(id)d)); cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
...@@ -463,9 +452,6 @@ if(softmax_input_%(id)d != NULL) ...@@ -463,9 +452,6 @@ if(softmax_input_%(id)d != NULL)
if(softmax_output_%(id)d != NULL) if(softmax_output_%(id)d != NULL)
cudnnDestroyTensor4dDescriptor(softmax_output_%(id)d); cudnnDestroyTensor4dDescriptor(softmax_output_%(id)d);
if(softmax_handle_%(id)d != NULL)
cudnnDestroy(softmax_handle_%(id)d);
""" % dict(id=struct_id) """ % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -542,7 +528,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -542,7 +528,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
} }
err%(name)s = cudnnSoftmaxForward( err%(name)s = cudnnSoftmaxForward(
softmax_handle_%(id)d, _handle,
algo%(id)d, algo%(id)d,
mode%(id)d, mode%(id)d,
softmax_input_%(id)d, softmax_input_%(id)d,
...@@ -561,7 +547,7 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -561,7 +547,7 @@ err%(name)s = cudnnSoftmaxForward(
def local_softmax_dnn(node): def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle('x', 'x', 0, 1) ins = node.inputs[0].dimshuffle('x', 'x', 0, 1)
out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(gpu_contiguous(ins)) out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(2, 3)) out = as_cuda_ndarray_variable(out.dimshuffle(2, 3))
return [out] return [out]
......
...@@ -241,11 +241,12 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): ...@@ -241,11 +241,12 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
cmp(2, 10000, f, f_gpu) cmp(2, 10000, f, f_gpu)
cmp(128, 16 * 1024, f, f_gpu) cmp(128, 16 * 1024, f, f_gpu)
cmp(128, 64 * 1024, f, f_gpu) cmp(128, 64 * 1024, f, f_gpu)
cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows
cmp(5, 2 << 15, f, f_gpu) cmp(5, 2 << 15, f, f_gpu)
return f, f_gpu return f, f_gpu
def test_softmax(): def test_softmax():
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
#print "test_softmax",n,m #print "test_softmax",n,m
...@@ -256,13 +257,23 @@ def test_softmax(): ...@@ -256,13 +257,23 @@ def test_softmax():
x = T.fmatrix('x') x = T.fmatrix('x')
z = T.nnet.softmax z = T.nnet.softmax
f, f_gpu = _test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2) f, f_gpu = _test_softmax(
x,
x,
z,
z,
type(z),
cuda.nnet.GpuSoftmax,
cmp,
-2
)
# cuDNN cannot handle these test cases but the Theano softmax can so we # cuDNN cannot handle these test cases but the Theano softmax can so we
# test them only for the Theano softmax. # test them only for the Theano softmax.
cmp(2 << 15, 5, f, f_gpu) cmp(2 << 15, 5, f, f_gpu)
cmp(0, 10, f, f_gpu) cmp(0, 10, f, f_gpu)
def test_cudnn_softmax(): def test_cudnn_softmax():
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
#print "test_softmax",n,m #print "test_softmax",n,m
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论