提交 6e500132 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Enable mergeing of some operations for GpuConv and unbreak infer_shape tests.

上级 54d16f99
...@@ -21,6 +21,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, ...@@ -21,6 +21,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge
from theano.sandbox.cuda import gpu_seqopt, register_opt from theano.sandbox.cuda import gpu_seqopt, register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
...@@ -347,6 +348,8 @@ def ensure_float(val, default, name): ...@@ -347,6 +348,8 @@ def ensure_float(val, default, name):
return default.clone() return default.clone()
if not isinstance(val, Variable): if not isinstance(val, Variable):
val = constant(val) val = constant(val)
if hasattr(val, 'ndim') and val.ndim == 0:
val = as_scalar(val)
if not isinstance(val.type, theano.scalar.Scalar): if not isinstance(val.type, theano.scalar.Scalar):
raise TypeError("%s: expected a scalar value" % (name,)) raise TypeError("%s: expected a scalar value" % (name,))
if not val.type.dtype == 'float32': if not val.type.dtype == 'float32':
...@@ -1492,6 +1495,48 @@ if True: ...@@ -1492,6 +1495,48 @@ if True:
return return
return [GpuDnnConvGradI(inplace=True)(*node.inputs)] return [GpuDnnConvGradI(inplace=True)(*node.inputs)]
@register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, nd=4)
def local_dnn_conv_alpha_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConv(workmem=node.op.workmem)(*inputs)]
@register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, nd=4)
def local_dnn_convw_alpha_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConvGradW()(*inputs)]
@register_opt('cudnn')
@alpha_merge(GpuDnnConvGradI, alpha_in=4, nd=4)
def local_dnn_convi_alpha_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConvGradW()(*inputs)]
@register_opt('cudnn')
@output_merge(GpuDnnConv, alpha_in=4, out_in=2, nd=4)
def local_dnn_conv_output_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConv(workmem=node.op.workmem)(*inputs)]
@register_opt('cudnn')
@output_merge(GpuDnnConvGradW, alpha_in=4, out_in=2, nd=4)
def local_dnn_convw_output_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConvGradW()(*inputs)]
@register_opt('cudnn')
@output_merge(GpuDnnConvGradI, alpha_in=4, out_in=2, nd=4)
def local_dnn_convi_output_merge(node, *inputs):
if version() == -1:
return None
return [GpuDnnConvGradI()(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node): def local_pool_dnn(node):
......
...@@ -237,12 +237,13 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -237,12 +237,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img') img = T.ftensor4('img')
kerns = T.ftensor4('kerns') kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(7, 2, 6, 4),
dtype='float32' dtype='float32'
) )
kern_vals = numpy.asarray( kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(8, 2, 4, 3),
dtype='float32' dtype='float32'
) )
...@@ -251,16 +252,21 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -251,16 +252,21 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2)], [(1, 1), (2, 2)],
['conv', 'cross'] ['conv', 'cross']
): ):
out_vals = numpy.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=params[0],
subsample=params[1], subsample=params[1],
conv_mode=params[2] conv_mode=params[2]
)(img.shape, kerns.shape) )(img.shape, kerns.shape)
conv = dnn.GpuDnnConv()(img_val, kern_vals, desc) conv = dnn.GpuDnnConv()(img, kerns, out, desc)
self._compile_and_check( self._compile_and_check(
[img, kerns], [img, kerns, out],
[conv], [conv],
[img_val, kern_vals], [img_val, kern_vals, out_vals],
dnn.GpuDnnConv dnn.GpuDnnConv
) )
...@@ -269,14 +275,16 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -269,14 +275,16 @@ class TestDnnInferShapes(utt.InferShapeTester):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img') img = T.ftensor4('img')
kerns = T.ftensor4('kerns') kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(2, 5, 6, 8),
dtype='float32' dtype='float32'
) )
kern_vals = numpy.asarray( kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(2, 1, 5, 6),
dtype='float32' dtype='float32'
) )
out_vals = numpy.zeros((3, 3, 1, 1), dtype='float32')
for params in product( for params in product(
['valid', 'full'], ['valid', 'full'],
...@@ -288,27 +296,27 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -288,27 +296,27 @@ class TestDnnInferShapes(utt.InferShapeTester):
if params[2] == 'conv': if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1] temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3) temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack( shape = (
temp_kerns.shape[1], temp_img.shape[1], kern_vals.shape[1], img_val.shape[1],
temp_img.shape[2] - temp_kerns.shape[2] + 1, img_val.shape[2] - kern_vals.shape[2] + 1,
temp_img.shape[3] - temp_kerns.shape[3] + 1 img_val.shape[3] - kern_vals.shape[3] + 1
) )
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=params[0],
subsample=params[1], subsample=params[1],
conv_mode=params[2] conv_mode=params[2]
)(temp_img.shape, shape) )(temp_img.shape, out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()( conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img, temp_img,
temp_kerns, temp_kerns,
out,
desc, desc,
shape[2],
shape[3]
) )
self._compile_and_check( self._compile_and_check(
[temp_img, temp_kerns], [temp_img, temp_kerns, out],
[conv_grad_w], [conv_grad_w],
[img_val, kern_vals], [img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW dnn.GpuDnnConvGradW
) )
...@@ -317,6 +325,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -317,6 +325,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img') img = T.ftensor4('img')
kerns = T.ftensor4('kerns') kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(3, 4, 5, 6),
dtype='float32' dtype='float32'
...@@ -331,29 +340,28 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -331,29 +340,28 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1)], [(1, 1)],
['conv', 'cross'] ['conv', 'cross']
): ):
print params
temp_kerns = kerns.dimshuffle(1, 0, 2, 3) temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
shape = theano.tensor.stack( shape = (
img.shape[0], temp_kerns.shape[1], img_val.shape[0], kern_vals.shape[1],
img.shape[2] + temp_kerns.shape[2] - 1, img_val.shape[2] + kern_vals.shape[2] - 1,
img.shape[3] + temp_kerns.shape[3] - 1 img_val.shape[3] + kern_vals.shape[3] - 1
) )
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=params[0],
subsample=params[1], subsample=params[1],
conv_mode=params[2] conv_mode=params[2]
)(shape, temp_kerns.shape) )(out.shape, temp_kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()( conv_grad_i = dnn.GpuDnnConvGradI()(
temp_kerns, temp_kerns,
img, img,
out,
desc, desc,
shape[2],
shape[3]
) )
self._compile_and_check( self._compile_and_check(
[temp_kerns, img], [temp_kerns, img, out],
[conv_grad_i], [conv_grad_i],
[kern_vals, img_val], [kern_vals, img_val, out_vals],
dnn.GpuDnnConvGradI dnn.GpuDnnConvGradI
) )
...@@ -424,6 +432,65 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -424,6 +432,65 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnPoolGrad dnn.GpuDnnPoolGrad
) )
def test_dnn_conv_merge():
img = T.ftensor4()
kern = T.ftensor4()
out = T.ftensor4()
b = 1
c = 4
f = 3
ih = 2
iw = 8
kh = 2
kw = 2
img_val = numpy.random.random((b, c, ih, iw)).astype('float32')
kern_val = numpy.random.random((f, c, kh, kw)).astype('float32')
out_val = numpy.random.random((b, f, ih-kw+1, iw-kw+1)).astype('float32')
conv = dnn.dnn_conv(img, kern)
gw = theano.grad(conv.sum(), kern)
gi = theano.grad(conv.sum(), img)
lr = numpy.asarray(0.05, dtype='float32')
fr = out - lr * conv
wr = kern - lr * gw
ir = img - lr * gi
f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConv)
assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConvGradW)
assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConvGradI)
mode = mode_with_gpu
mode = mode.excluding('local_dnn_conv_alpha_merge')
mode = mode.excluding('local_dnn_convw_alpha_merge')
mode = mode.excluding('local_dnn_convi_alpha_merge')
mode = mode.excluding('local_dnn_conv_output_merge')
mode = mode.excluding('local_dnn_convw_output_merge')
mode = mode.excluding('local_dnn_convi_output_merge')
f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)
assert not isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConv)
assert not isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConvGradW)
assert not isinstance(f1.maker.fgraph.outputs[0].owner.op,
dnn.GpuDnnConvGradI)
out_f1 = f1(img_val, kern_val, out_val)
out_f2 = f2(img_val, kern_val, out_val)
assert len(out_f1) == len(out_f2)
for v1, v2 in zip(out_f1, out_f2):
utt.assert_allclose(v1, v2)
def test_version(): def test_version():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论