提交 1d9aff8a authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Run AbstractConv3d with dnn_conv3 if cuDNN is available.

上级 5c8876ba
...@@ -37,7 +37,10 @@ from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler ...@@ -37,7 +37,10 @@ from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.tensor.nnet.abstract_conv import (AbstractConv2d, from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs) AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs)
def c_define_tensor_desc(desc): def c_define_tensor_desc(desc):
...@@ -3229,3 +3232,53 @@ def local_abstractconv_cudnn(node): ...@@ -3229,3 +3232,53 @@ def local_abstractconv_cudnn(node):
subsample=node.op.subsample, subsample=node.op.subsample,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
@local_optimizer([AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs])
def local_abstractconv3d_cudnn(node):
if (not isinstance(node.op, (AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs))):
return None
if (node.op.filter_dilation != (1, 1, 1)):
return None
inp1 = node.inputs[0]
inp2 = node.inputs[1]
if (not isinstance(inp1.type, CudaNdarrayType) or
not isinstance(inp2.type, CudaNdarrayType)):
return None
if not dnn_available():
return None
if node.op.filter_flip:
conv_mode = 'conv'
else:
conv_mode = 'cross'
if (isinstance(node.op, AbstractConv3d)):
rval = dnn_conv3d(inp1, inp2,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
direction_hint='forward',
conv_mode=conv_mode)
return [rval]
if (isinstance(node.op, AbstractConv3d_gradWeights)):
shape = (inp2.shape[1], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1], node.inputs[2][2])
rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
conv_mode=conv_mode)
return [rval]
if (isinstance(node.op, AbstractConv3d_gradInputs)):
shape = (inp2.shape[0], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1], node.inputs[2][2])
rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
conv_mode=conv_mode)
return [rval]
...@@ -90,7 +90,10 @@ from theano.tests.breakpoint import PdbBreakpoint ...@@ -90,7 +90,10 @@ from theano.tests.breakpoint import PdbBreakpoint
from theano.tensor.nnet.abstract_conv import (BaseAbstractConv, from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
AbstractConv2d, AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs) AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs)
from theano.tensor.opt import register_specialize_device from theano.tensor.opt import register_specialize_device
...@@ -2726,7 +2729,10 @@ optdb.register('local_inplace_gpu_sparse_block_outer', ...@@ -2726,7 +2729,10 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
@local_optimizer([gpu_from_host, @local_optimizer([gpu_from_host,
AbstractConv2d, AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs])
def local_conv2d_gpu_conv(node): def local_conv2d_gpu_conv(node):
""" """
gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host) gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host)
...@@ -2899,6 +2905,10 @@ conv_groupopt.register('local_abstractconv_dnn', ...@@ -2899,6 +2905,10 @@ conv_groupopt.register('local_abstractconv_dnn',
dnn.local_abstractconv_cudnn, 20, dnn.local_abstractconv_cudnn, 20,
'conv_dnn', 'conv_dnn',
'gpu', 'fast_compile', 'fast_run', 'cudnn') 'gpu', 'fast_compile', 'fast_run', 'cudnn')
conv_groupopt.register('local_abstractconv3d_dnn',
dnn.local_abstractconv3d_cudnn, 20,
'conv_dnn',
'gpu', 'fast_compile', 'fast_run', 'cudnn')
# The GEMM-based convolution comes last to catch all remaining cases. # The GEMM-based convolution comes last to catch all remaining cases.
# It can be disabled by excluding 'conv_gemm'. # It can be disabled by excluding 'conv_gemm'.
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30, conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
......
...@@ -7,7 +7,8 @@ from theano.sandbox.cuda import float32_shared_constructor as gpu_shared ...@@ -7,7 +7,8 @@ from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
from theano.sandbox.cuda.dnn import ( from theano.sandbox.cuda.dnn import (
dnn_available, dnn_available,
GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI) GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
GpuDnnConv3d, GpuDnnConv3dGradW, GpuDnnConv3dGradI)
from theano.sandbox.cuda.blas import ( from theano.sandbox.cuda.blas import (
GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs) GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs)
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
...@@ -56,6 +57,40 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -56,6 +57,40 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
filter_dilation=fd) filter_dilation=fd)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
def setup_class(cls):
test_abstract_conv.BaseTestConv3d.setup_class()
# provide_shape is not used by the cuDNN impementation
cls.provide_shape = [False]
cls.shared = staticmethod(gpu_shared)
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv3d,
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv3dGradW,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv3dGradI,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论