提交 a50d7907 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Add AbstractConv3d optimisations to gpuarray backend.

上级 07bc5550
...@@ -19,6 +19,9 @@ from theano.tensor.nnet import LogSoftmax, SoftmaxGrad ...@@ -19,6 +19,9 @@ from theano.tensor.nnet import LogSoftmax, SoftmaxGrad
from theano.tensor.nnet.abstract_conv import (AbstractConv2d, from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs, AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs,
get_conv_output_shape) get_conv_output_shape)
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
...@@ -1919,31 +1922,85 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -1919,31 +1922,85 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
@register_opt2([AbstractConv3d, AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs))):
return
if (op.filter_dilation != (1, 1, 1)):
return None
inp1 = inputs[0]
inp2 = inputs[1]
if not dnn_available(inp1.type.context_name):
raise_no_cudnn()
if op.filter_flip:
conv_mode = 'conv'
else:
conv_mode = 'cross'
if isinstance(op, AbstractConv3d):
rval = dnn_conv3d(inp1, inp2,
border_mode=op.border_mode,
subsample=op.subsample,
direction_hint='forward!',
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradWeights):
shape = (inp2.shape[1], inp1.shape[1],
inputs[2][0], inputs[2][1], inputs[2][2])
rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
inputs[2][0], inputs[2][1], inputs[2][2])
rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
conv_mode=conv_mode)
return [rval]
@register_opt('fast_compile', 'conv_dnn', 'cudnn') @register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d, AbstractConv3d])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn') @register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights, AbstractConv3d_gradWeights])
def local_abstractconv_gw_cudnn(node): def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d_gradWeights):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d_gradWeights):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn') @register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs, AbstractConv3d_gradInputs])
def local_abstractconv_gi_cudnn(node): def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d_gradInputs):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d_gradInputs):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@inplace_allocempty(GpuDnnConv, 2) @inplace_allocempty(GpuDnnConv, 2)
......
...@@ -24,7 +24,10 @@ from theano.tensor.nnet.conv import ConvOp ...@@ -24,7 +24,10 @@ from theano.tensor.nnet.conv import ConvOp
from theano.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter from theano.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter
from theano.tensor.nnet.abstract_conv import (AbstractConv2d, from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs) AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs)
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
...@@ -1297,18 +1300,24 @@ def local_inplace_sparseblockouter(node): ...@@ -1297,18 +1300,24 @@ def local_inplace_sparseblockouter(node):
@register_opt('fast_compile', 'conv_dnn', 'cudnn') @register_opt('fast_compile', 'conv_dnn', 'cudnn')
@op_lifter([AbstractConv2d, @op_lifter([AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs,
def local_gpua_abstractconv2d(op, context_name, inputs, outputs): AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs])
def local_gpua_abstractconv(op, context_name, inputs, outputs):
if isinstance(outputs[0].type, GpuArrayType): if isinstance(outputs[0].type, GpuArrayType):
# Don't handle this node here, it's already on the GPU. # Don't handle this node here, it's already on the GPU.
return return
return local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs) return local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs)
@register_opt2([AbstractConv2d, @register_opt2([AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn') AbstractConv2d_gradInputs,
def local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs): AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs):
inps = list(inputs) inps = list(inputs)
inps[0] = as_gpuarray_variable(inputs[0], inps[0] = as_gpuarray_variable(inputs[0],
context_name=context_name) context_name=context_name)
......
...@@ -47,6 +47,39 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -47,6 +47,39 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
filter_flip=flip, target_op=GpuDnnConvGradI) filter_flip=flip, target_op=GpuDnnConvGradI)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
def setup_class(cls):
test_abstract_conv.BaseTestConv3d.setup_class()
cls.shared = staticmethod(gpuarray_shared_constructor)
# provide_shape is not used by the cuDNN impementation
cls.provide_shape = [False]
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
mode = mode_with_gpu
if fd != (1, 1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradW)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes): class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self): def setUp(self):
self.input = gpu_ftensor4() self.input = gpu_ftensor4()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论