提交 b5933e75 authored 作者: Yann N. Dauphin's avatar Yann N. Dauphin

Merge pull request #3 from nouiz/ynd-dnn_pooling

Ynd dnn pooling
......@@ -34,6 +34,27 @@ dnn_available.avail = None
dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail):
return """
%(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
);
if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(%(err)s));
%(fail)s
}
""" % dict(var=var, err=err, desc=desc, fail=fail)
class DnnBase(GpuOp):
"""
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
......@@ -99,26 +120,6 @@ class GpuDnnConvDesc(GpuOp):
return Apply(self, [img_shape, kern_shape],
[CDataType("cudnnConvolutionDescriptor_t")()])
def c_set_tensor4d(self, var, desc, err, fail):
return """
%(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
);
if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(%(err)s));
%(fail)s
}
""" % dict(var=var, err=err, desc=desc, fail=fail)
def c_code(self, node, name, inputs, outputs, sub):
img_shape, kern_shape = inputs
desc, = outputs
......@@ -517,18 +518,18 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_S
def c_cleanup_code_struct(self, node, struct_id):
return """
if (input%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (output%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
""" % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[1]
out, = outputs
set_in = self.c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']),
set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']),
'err' + name, sub['fail'])
set_out = self.c_set_tensor4d(out, "output" + str(sub['struct_id']),
set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail'])
return """
......@@ -600,7 +601,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]]
def c_code_cache_version(self):
return (1,)
return (2,)
class GpuDnnPoolGrad(DnnBase):
......@@ -665,10 +666,10 @@ if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output_grad%(id)d)) != CUDNN_STA
def c_cleanup_code_struct(self, node, struct_id):
return """
if (input%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (input_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(id)d); }
if (output%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
if (output_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)d); }
if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); }
if (input_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(id)d); }
if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)d); }
""" % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub):
......@@ -676,15 +677,15 @@ if (output_grad%(id)d) != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id
out_grad, = outputs
set_in = "\n".join([
self.c_set_tensor4d(inp, "input" + str(sub['struct_id']),
c_set_tensor4d(inp, "input" + str(sub['struct_id']),
'err' + name, sub['fail']),
self.c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']),
c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']),
'err' + name, sub['fail']),
self.c_set_tensor4d(out, "output" + str(sub['struct_id']),
c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail'])
])
set_out = self.c_set_tensor4d(out, "output_grad" + str(sub['struct_id']),
set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']),
'err' + name, sub['fail'])
return """
......@@ -736,7 +737,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+str(sub['struct_id']))
def c_code_cache_version(self):
return (1,)
return (2,)
def dnn_pool(img, ws, stride=(1, 1), mode='max'):
......
......@@ -46,8 +46,8 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, pool_function=T.max, mode='ignor
def test_pooling():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.tensor4()
x = T.ftensor4()
for func in (T.max, T.mean):
for ws in (4, 5):
......@@ -57,8 +57,8 @@ def test_pooling():
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pool_function=func)
f1 = theano.function([x], out1)
f2 = theano.function([x], out2)
f1 = theano.function([x], out1, mode=mode_with_gpu)
f2 = theano.function([x], out2, mode=mode_with_gpu)
data = numpy.random.normal(0, 1, (1, 10, 100, 100)).astype("float32")
a = f1(data).__array__()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论