提交 7801d91e authored 作者: Dustin Webb's avatar Dustin Webb

Completed implementation of infer_shape on cudnn operators.

上级 6aacf213
......@@ -377,12 +377,20 @@ class GpuDnnConv(DnnBase, COp):
kw = shape[1][3] # Width of each filter
padh = 0
padw = 0
if (
not node.inputs[2].owner
or not isinstance(node.inputs[2].owner.op, GpuDnnConvDesc)
):
raise theano.tensor.basic.ShareError("case not implemented and probably not needed")
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
if desc.border_mode == 'full':
padh = kh - 1
padw = kw - 1
elif isinstance(desc.border_mode, tuple):
padh, padw = self.border_mode
else:
assert desc.border_mode == 'valid'
return [(
b, nb,
......@@ -450,23 +458,33 @@ class GpuDnnConvGradW(DnnBase, COp):
w = shape[0][3] # Width of input feature maps
kh = shape[1][2] # Height of each filter
kw = shape[1][3] # Width of each filter
out3 = kh
out4 = kw
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
# We don't have the information necessary, namely the weight size so
# we cannot infer the shape
if sh != 1 or sw != 1:
raise ShapeError(
'Unable to infer shape for stride (%d, %d)' % (sh, sw)
)
if desc.border_mode == 'full':
kh = 2 - h + (kh - 1) * sh
kw = 2 - w + (kw - 1) * sw
out3 = 2 - h + (kh - 1) * sh
out4 = 2 - w + (kw - 1) * sw
else:
# border_mode is 'valid'
assert(desc.border_mode == 'valid')
kh = h - (kh - 1) * sh
kw = w - (kw - 1) * sw
out3 = h - (kh - 1) * sh
out4 = w - (kw - 1) * sw
return [(
shape[1][1],
shape[0][1],
kh,
kw
out3,
out4
)]
......@@ -524,26 +542,35 @@ class GpuDnnConvGradI(DnnBase, COp):
[CudaNdarrayType(broadcastable)()])
def infer_shape(self, node, shape):
b = shape[0][0] # Number of inputs
h = shape[0][2] # Height of input feature maps
w = shape[0][3] # Width of input feature maps
nb = shape[1][0] # Number of output feature maps
kh = shape[1][2] # Height of each filter
kw = shape[1][3] # Width of each filter
padh = 0
padw = 0
desc = node.inputs[2].owner.op
sh, sw = desc.subsample
# We don't have the information necessary, namely the image size so
# we cannot infer the shape
if sh != 1 or sw != 1:
raise ShapeError(
'Unable to infer shape for stride (%d, %d)' % (sh, sw)
)
if desc.border_mode == 'full':
padh = h - 1
padw = w - 1
padh = shape[0][2] - 1
padw = shape[0][3] - 1
elif isinstance(desc.border_mode, tuple):
padh, padw = self.border_mode
else:
assert desc.border_mode == 'valid'
out2 = (shape[1][2] - 1) * sh + shape[0][2] - 2*padh
out3 = (shape[1][3] - 1) * sw + shape[0][3] - 2*padw
return [(
shape[1][0],
shape[0][1],
(kh - 1) * sh + h - 2*padh,
(kw - 1) * sw + w - 2*padw
out2,
out3
)]
......@@ -725,29 +752,15 @@ class GpuDnnPool(DnnBase):
[img.type()])
def infer_shape(self, node, shape):
n = shape[0][0] # Number of inputs
h = shape[0][2] # Height of input feature maps
w = shape[0][3] # Width of input feature maps
nb = shape[1][0] # Number of output feature maps
kh = shape[1][2] # Height of each filter
kw = shape[1][3] # Width of each filter
padh = 0
padw = 0
sh = 1
sw = 1
desc = node.inputs[2].owner.op
if desc.border_mode == 'full':
padh = kh - 1
padw = kw - 1
sh = desc.stride[0]
sw = desc.stride[1]
return (
b, nb,
(h + 2*padh - kh)/sh + 1,
(w + 2*padw - kw)/sw + 1
)
desc = node.inputs[1].owner.op
kh, kw = desc.ws
sh, sw = desc.stride
return [(
shape[0][0],
shape[0][1],
(shape[0][2] - kh)/sh + 1,
(shape[0][3] - kw)/sw + 1
)]
def c_support_code_struct(self, node, name):
return """
......@@ -1058,6 +1071,9 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
def c_code_cache_version(self):
return (4, version())
def infer_shape(self, node, shape):
return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max'):
"""
......@@ -1111,10 +1127,10 @@ class GpuDnnSoftmaxBase(DnnBase):
self.tensor_4d_descs.append('softmax_output')
def infer_shape(self, node, shape):
if isinstance(shape, list):
if self.direction == 'forward':
return [shape[0]]
else:
return shape*2
return [shape[1]]
def _define_tensor4d_desc(self, name, id):
return """
......@@ -1229,6 +1245,7 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
class GpuDnnSoftmax(GpuDnnSoftmaxBase):
direction = 'forward'
softmax_inputs = ['softmax_input']
def make_node(self, x):
......@@ -1279,6 +1296,7 @@ err%(name)s = cudnnSoftmaxForward(
class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
direction = 'backward'
softmax_inputs = ['softmax_gout', 'softmax_input']
def make_node(self, dy, sm):
......
......@@ -14,6 +14,7 @@ from theano.sandbox.neighbours import images2neibs, neibs2images
from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import gpu_contiguous
# Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda
......@@ -232,11 +233,11 @@ class TestDnnInferShapes(utt.InferShapeTester):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
......@@ -245,12 +246,12 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2)],
['conv', 'cross']
):
conv = dnn.dnn_conv(img, kerns, params[0], params[1], params[2])
softmax = dnn.GpuDnnSoftmax(
'bc01',
'accurate',
'channel'
)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(img.shape, kerns.shape)
conv = dnn.GpuDnnConv()(img_val, kern_vals, desc)
self._compile_and_check(
[img, kerns],
[conv],
......@@ -258,18 +259,144 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv
)
def test_conv_gradw(self):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
for params in product(
['valid', 'full'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack(
temp_kerns.shape[1], temp_img.shape[1],
temp_img.shape[2] - temp_kerns.shape[2] + 1,
temp_img.shape[3] - temp_kerns.shape[3] + 1
)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(temp_img.shape, shape)
conv_grad_w = dnn.GpuDnnConvGradW()(temp_img, temp_kerns, desc)
self._compile_and_check(
[img, kerns],
[T.grad(softmax(conv).mean(), img)],
[temp_img, temp_kerns],
[conv_grad_w],
[img_val, kern_vals],
dnn.GpuDnnConvGradW
)
def test_conv_gradi(self):
img = T.tensor4('img')
kerns = T.tensor4('kerns')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype=theano.config.floatX
)
for params in product(
['valid'], # Should this work for 'full'?
[(1, 1)],
['conv', 'cross']
):
print params
temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack(
img.shape[0], temp_kerns.shape[1],
img.shape[2] + temp_kerns.shape[2] - 1,
img.shape[3] + temp_kerns.shape[3] - 1
)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(shape, temp_kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()(temp_kerns, img, desc)
self._compile_and_check(
[temp_kerns, img],
[conv_grad_i],
[kern_vals, img_val],
dnn.GpuDnnConvGradI
)
def test_pool(self):
img = T.tensor4('img')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
self._compile_and_check(
[img, kerns],
[T.grad(softmax(conv).mean(), kerns)],
[img_val, kern_vals],
dnn.GpuDnnConvGradW
[img],
[dnn.GpuDnnPool()(img, desc)],
[img_val],
dnn.GpuDnnPool
)
def test_pool_grad(self):
img = T.tensor4('img')
img_grad = T.tensor4('img_grad')
out = T.tensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
img_grad_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
out_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype=theano.config.floatX
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
pool_grad = dnn.GpuDnnPoolGrad()(
img,
out,
img_grad,
desc
)
self._compile_and_check(
[img, img_grad, out],
[pool_grad],
[img_val, img_grad_val, out_val],
dnn.GpuDnnPoolGrad
)
......
......@@ -248,7 +248,7 @@ class InferShapeTester(unittest.TestCase):
numeric_outputs = outputs_function(*numeric_inputs)
numeric_shapes = shapes_function(*numeric_inputs)
for out, shape in zip(numeric_outputs, numeric_shapes):
assert numpy.all(out.shape == shape)
assert numpy.all(out.shape == shape), (out.shape, shape)
def str_diagnostic(expected, value, rtol, atol):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论