提交 b5933e75 authored 作者: Yann N. Dauphin's avatar Yann N. Dauphin

Merge pull request #3 from nouiz/ynd-dnn_pooling

Ynd dnn pooling
...@@ -34,6 +34,27 @@ dnn_available.avail = None ...@@ -34,6 +34,27 @@ dnn_available.avail = None
dnn_available.msg = None dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail):
return """
%(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
);
if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(%(err)s));
%(fail)s
}
""" % dict(var=var, err=err, desc=desc, fail=fail)
class DnnBase(GpuOp): class DnnBase(GpuOp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -99,26 +120,6 @@ class GpuDnnConvDesc(GpuOp): ...@@ -99,26 +120,6 @@ class GpuDnnConvDesc(GpuOp):
return Apply(self, [img_shape, kern_shape], return Apply(self, [img_shape, kern_shape],
[CDataType("cudnnConvolutionDescriptor_t")()]) [CDataType("cudnnConvolutionDescriptor_t")()])
def c_set_tensor4d(self, var, desc, err, fail):
return """
%(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
);
if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(%(err)s));
%(fail)s
}
""" % dict(var=var, err=err, desc=desc, fail=fail)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
img_shape, kern_shape = inputs img_shape, kern_shape = inputs
desc, = outputs desc, = outputs
...@@ -517,18 +518,18 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_S ...@@ -517,18 +518,18 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_S
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, struct_id):
return """ return """
if (input%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); } if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (output%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); } if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
""" % dict(id=struct_id) """ % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[1] desc = inputs[1]
out, = outputs out, = outputs
set_in = self.c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']), set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
set_out = self.c_set_tensor4d(out, "output" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
...@@ -600,7 +601,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -600,7 +601,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]] return [[1], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -665,10 +666,10 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output_grad%(id)d)) != CUDNN_STA ...@@ -665,10 +666,10 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output_grad%(id)d)) != CUDNN_STA
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, struct_id):
return """ return """
if (input%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); } if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (input_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(id)d); } if (input_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(id)d); }
if (output%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); } if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
if (output_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)d); } if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)d); }
""" % dict(id=struct_id) """ % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -676,15 +677,15 @@ if (output_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id ...@@ -676,15 +677,15 @@ if (output_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id
out_grad, = outputs out_grad, = outputs
set_in = "\n".join([ set_in = "\n".join([
self.c_set_tensor4d(inp, "input" + str(sub['struct_id']), c_set_tensor4d(inp, "input" + str(sub['struct_id']),
'err' + name, sub['fail']), 'err' + name, sub['fail']),
self.c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']), c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']),
'err' + name, sub['fail']), 'err' + name, sub['fail']),
self.c_set_tensor4d(out, "output" + str(sub['struct_id']), c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
]) ])
set_out = self.c_set_tensor4d(out, "output_grad" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
...@@ -736,7 +737,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -736,7 +737,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+str(sub['struct_id'])) output_grad_desc="output_grad"+str(sub['struct_id']))
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def dnn_pool(img, ws, stride=(1, 1), mode='max'): def dnn_pool(img, ws, stride=(1, 1), mode='max'):
......
...@@ -46,8 +46,8 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, pool_function=T.max, mode='ignor ...@@ -46,8 +46,8 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, pool_function=T.max, mode='ignor
def test_pooling(): def test_pooling():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.tensor4() x = T.ftensor4()
for func in (T.max, T.mean): for func in (T.max, T.mean):
for ws in (4, 5): for ws in (4, 5):
...@@ -57,8 +57,8 @@ def test_pooling(): ...@@ -57,8 +57,8 @@ def test_pooling():
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride), out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pool_function=func) pool_function=func)
f1 = theano.function([x], out1) f1 = theano.function([x], out1, mode=mode_with_gpu)
f2 = theano.function([x], out2) f2 = theano.function([x], out2, mode=mode_with_gpu)
data = numpy.random.normal(0, 1, (1, 10, 100, 100)).astype("float32") data = numpy.random.normal(0, 1, (1, 10, 100, 100)).astype("float32")
a = f1(data).__array__() a = f1(data).__array__()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论