提交 f8bd5b80 authored 作者: abergeron's avatar abergeron

Merge pull request #2376 from daemonmaker/cudnn2

Cudnn2
...@@ -368,6 +368,36 @@ class GpuDnnConv(DnnBase, COp): ...@@ -368,6 +368,36 @@ class GpuDnnConv(DnnBase, COp):
# not connected to desc # not connected to desc
return [[1], [1], [0]] return [[1], [1], [0]]
def infer_shape(self, node, shape):
b = shape[0][0] # Number of inputs
h = shape[0][2] # Height of input feature maps
w = shape[0][3] # Width of input feature maps
nb = shape[1][0] # Number of output feature maps
kh = shape[1][2] # Height of each filter
kw = shape[1][3] # Width of each filter
padh = 0
padw = 0
if (
not node.inputs[2].owner
or not isinstance(node.inputs[2].owner.op, GpuDnnConvDesc)
):
raise theano.tensor.basic.ShareError("case not implemented and probably not needed")
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
if desc.border_mode == 'full':
padh = kh - 1
padw = kw - 1
elif isinstance(desc.border_mode, tuple):
padh, padw = desc.border_mode
else:
assert desc.border_mode == 'valid'
return [(
b, nb,
(h + 2*padh - kh)/sh + 1,
(w + 2*padw - kw)/sw + 1
)]
class GpuDnnConvGradW(DnnBase, COp): class GpuDnnConvGradW(DnnBase, COp):
""" """
...@@ -423,6 +453,40 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -423,6 +453,40 @@ class GpuDnnConvGradW(DnnBase, COp):
return Apply(self, [img, topgrad, desc, h, w], return Apply(self, [img, topgrad, desc, h, w],
[CudaNdarrayType(broadcastable)()]) [CudaNdarrayType(broadcastable)()])
def infer_shape(self, node, shape):
h = shape[0][2] # Height of input feature maps
w = shape[0][3] # Width of input feature maps
kh = shape[1][2] # Height of each filter
kw = shape[1][3] # Width of each filter
out3 = kh
out4 = kw
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
# We don't have the information necessary, namely the weight size so
# we cannot infer the shape
if sh != 1 or sw != 1:
raise ShapeError(
'Unable to infer shape for stride (%d, %d)' % (sh, sw)
)
if desc.border_mode == 'full':
out3 = 2 - h + (kh - 1) * sh
out4 = 2 - w + (kw - 1) * sw
else:
# border_mode is 'valid'
assert(desc.border_mode == 'valid')
out3 = h - (kh - 1) * sh
out4 = w - (kw - 1) * sw
return [(
shape[1][1],
shape[0][1],
out3,
out4
)]
class GpuDnnConvGradI(DnnBase, COp): class GpuDnnConvGradI(DnnBase, COp):
""" """
...@@ -477,6 +541,38 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -477,6 +541,38 @@ class GpuDnnConvGradI(DnnBase, COp):
return Apply(self, [kern, topgrad, desc, h, w], return Apply(self, [kern, topgrad, desc, h, w],
[CudaNdarrayType(broadcastable)()]) [CudaNdarrayType(broadcastable)()])
def infer_shape(self, node, shape):
padh = 0
padw = 0
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
# We don't have the information necessary, namely the image size so
# we cannot infer the shape
if sh != 1 or sw != 1:
raise ShapeError(
'Unable to infer shape for stride (%d, %d)' % (sh, sw)
)
if desc.border_mode == 'full':
padh = shape[0][2] - 1
padw = shape[0][3] - 1
elif isinstance(desc.border_mode, tuple):
padh, padw = desc.border_mode
else:
assert desc.border_mode == 'valid'
out2 = (shape[1][2] - 1) * sh + shape[0][2] - 2*padh
out3 = (shape[1][3] - 1) * sw + shape[0][3] - 2*padw
return [(
shape[1][0],
shape[0][1],
out2,
out3
)]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv', direction_hint=None): conv_mode='conv', direction_hint=None):
...@@ -655,6 +751,17 @@ class GpuDnnPool(DnnBase): ...@@ -655,6 +751,17 @@ class GpuDnnPool(DnnBase):
return Apply(self, [img, desc], return Apply(self, [img, desc],
[img.type()]) [img.type()])
def infer_shape(self, node, shape):
desc = node.inputs[1].owner.op
kh, kw = desc.ws
sh, sw = desc.stride
return [(
shape[0][0],
shape[0][1],
(shape[0][2] - kh)/sh + 1,
(shape[0][3] - kw)/sw + 1
)]
def c_support_code_struct(self, node, name): def c_support_code_struct(self, node, name):
return """ return """
cudnnTensorDescriptor_t input%(name)s; cudnnTensorDescriptor_t input%(name)s;
...@@ -964,6 +1071,9 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -964,6 +1071,9 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
def c_code_cache_version(self): def c_code_cache_version(self):
return (4, version()) return (4, version())
def infer_shape(self, node, shape):
return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max'): def dnn_pool(img, ws, stride=(1, 1), mode='max'):
""" """
...@@ -1016,6 +1126,12 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1016,6 +1126,12 @@ class GpuDnnSoftmaxBase(DnnBase):
for softmax_input in self.softmax_inputs] for softmax_input in self.softmax_inputs]
self.tensor_4d_descs.append('softmax_output') self.tensor_4d_descs.append('softmax_output')
def infer_shape(self, node, shape):
if self.direction == 'forward':
return [shape[0]]
else:
return [shape[1]]
def _define_tensor4d_desc(self, name, id): def _define_tensor4d_desc(self, name, id):
return """ return """
cudnnTensorDescriptor_t %(id)s_%(name)s; cudnnTensorDescriptor_t %(id)s_%(name)s;
...@@ -1129,6 +1245,7 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0) ...@@ -1129,6 +1245,7 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
class GpuDnnSoftmax(GpuDnnSoftmaxBase): class GpuDnnSoftmax(GpuDnnSoftmaxBase):
direction = 'forward'
softmax_inputs = ['softmax_input'] softmax_inputs = ['softmax_input']
def make_node(self, x): def make_node(self, x):
...@@ -1179,6 +1296,7 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -1179,6 +1296,7 @@ err%(name)s = cudnnSoftmaxForward(
class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
direction = 'backward'
softmax_inputs = ['softmax_gout', 'softmax_input'] softmax_inputs = ['softmax_gout', 'softmax_input']
def make_node(self, dy, sm): def make_node(self, dy, sm):
......
...@@ -3,6 +3,7 @@ import unittest ...@@ -3,6 +3,7 @@ import unittest
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
from itertools import product
import theano import theano
from theano.compat.six import StringIO from theano.compat.six import StringIO
...@@ -12,7 +13,8 @@ import theano.tests.unittest_tools as utt ...@@ -12,7 +13,8 @@ import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs, neibs2images from theano.sandbox.neighbours import images2neibs, neibs2images
from theano.tensor.signal.downsample import max_pool_2d from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import gpu_contiguous
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -194,6 +196,222 @@ def test_dnn_tag(): ...@@ -194,6 +196,222 @@ def test_dnn_tag():
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
class TestDnnInferShapes(utt.InferShapeTester):
def setUp(self):
super(TestDnnInferShapes, self).setUp()
def test_softmax(self):
t = T.tensor4('t')
rand_tensor = numpy.asarray(
numpy.random.rand(5, 4, 3, 2),
dtype=theano.config.floatX
)
self._compile_and_check(
[t],
[dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(t)],
[rand_tensor],
dnn.GpuDnnSoftmax
)
self._compile_and_check(
[t],
[
T.grad(
dnn.GpuDnnSoftmax(
'bc01',
'accurate',
'channel'
)(t).mean(),
t
)
],
[rand_tensor],
dnn.GpuDnnSoftmaxGrad
)
def test_conv(self):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
for params in product(
['valid', 'full'],
[(1, 1), (2, 2)],
['conv', 'cross']
):
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(img.shape, kerns.shape)
conv = dnn.GpuDnnConv()(img_val, kern_vals, desc)
self._compile_and_check(
[img, kerns],
[conv],
[img_val, kern_vals],
dnn.GpuDnnConv
)
def test_conv_gradw(self):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
for params in product(
['valid', 'full'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack(
temp_kerns.shape[1], temp_img.shape[1],
temp_img.shape[2] - temp_kerns.shape[2] + 1,
temp_img.shape[3] - temp_kerns.shape[3] + 1
)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(temp_img.shape, shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
desc,
shape[2],
shape[3]
)
self._compile_and_check(
[temp_img, temp_kerns],
[conv_grad_w],
[img_val, kern_vals],
dnn.GpuDnnConvGradW
)
def test_conv_gradi(self):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
for params in product(
['valid'], # Should this work for 'full'?
[(1, 1)],
['conv', 'cross']
):
print params
temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack(
img.shape[0], temp_kerns.shape[1],
img.shape[2] + temp_kerns.shape[2] - 1,
img.shape[3] + temp_kerns.shape[3] - 1
)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(shape, temp_kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()(
temp_kerns,
img,
desc,
shape[2],
shape[3]
)
self._compile_and_check(
[temp_kerns, img],
[conv_grad_i],
[kern_vals, img_val],
dnn.GpuDnnConvGradI
)
def test_pool(self):
img = T.tensor4('img')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
self._compile_and_check(
[img],
[dnn.GpuDnnPool()(img, desc)],
[img_val],
dnn.GpuDnnPool
)
def test_pool_grad(self):
img = T.tensor4('img')
img_grad = T.tensor4('img_grad')
out = T.tensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
img_grad_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
out_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
pool_grad = dnn.GpuDnnPoolGrad()(
img,
out,
img_grad,
desc
)
self._compile_and_check(
[img, img_grad, out],
[pool_grad],
[img_val, img_grad_val, out_val],
dnn.GpuDnnPoolGrad
)
def test_version(): def test_version():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
......
...@@ -248,7 +248,7 @@ class InferShapeTester(unittest.TestCase): ...@@ -248,7 +248,7 @@ class InferShapeTester(unittest.TestCase):
numeric_outputs = outputs_function(*numeric_inputs) numeric_outputs = outputs_function(*numeric_inputs)
numeric_shapes = shapes_function(*numeric_inputs) numeric_shapes = shapes_function(*numeric_inputs)
for out, shape in zip(numeric_outputs, numeric_shapes): for out, shape in zip(numeric_outputs, numeric_shapes):
assert numpy.all(out.shape == shape) assert numpy.all(out.shape == shape), (out.shape, shape)
def str_diagnostic(expected, value, rtol, atol): def str_diagnostic(expected, value, rtol, atol):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论