提交 401a4dbe authored 作者: Gijs van Tulder's avatar Gijs van Tulder

CPU implementation for Corr3DMM and gradients.

The new Corr3dMM opts are also used to optimise AbstractConv. The code is similar to the 3D version in corr_gem.c.
上级 2681cd70
......@@ -234,7 +234,7 @@ def conv3d(input, filters, input_shape=None, filter_shape=None,
Notes
-----
If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used
GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution".
This is only supported in Theano 0.8 or the development
......
......@@ -417,7 +417,7 @@ def conv3d_grad_wrt_inputs(output_grad,
-----
:note: If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used
GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution".
:note: This is only supported in Theano 0.8 or the development
......@@ -670,7 +670,7 @@ def conv3d_grad_wrt_weights(input,
-----
:note: If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used
GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution".
:note: This is only supported in Theano 0.8 or the development
......
差异被折叠。
差异被折叠。
......@@ -10,6 +10,8 @@ from theano.gof.opt import copy_stack_trace
from theano.tensor.nnet.corr import (
CorrMM, CorrMM_gradInputs, CorrMM_gradWeights)
from theano.tensor.nnet.corr3d import (
Corr3dMM, Corr3dMM_gradInputs, Corr3dMM_gradWeights)
from theano.tensor.nnet.blocksparse import (
SparseBlockGemv,
SparseBlockOuter,
......@@ -90,6 +92,28 @@ def local_abstractconv_gemm(node):
return [rval]
@local_optimizer([AbstractConv3d])
def local_abstractconv3d_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d):
return None
img, kern = node.inputs
if not isinstance(img.type, TensorType) or \
not isinstance(kern.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMM(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, kern)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d_gradWeights])
def local_abstractconv_gradweight_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
......@@ -115,6 +139,31 @@ def local_abstractconv_gradweight_gemm(node):
return [rval]
@local_optimizer([AbstractConv3d_gradWeights])
def local_abstractconv3d_gradweight_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d_gradWeights):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, TensorType) or \
not isinstance(topgrad.type, TensorType):
return None
rval = Corr3dMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary
if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1, ::-1]
rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d_gradInputs])
def local_abstractconv_gradinputs_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
......@@ -138,6 +187,29 @@ def local_abstractconv_gradinputs_gemm(node):
return [rval]
@local_optimizer([AbstractConv3d_gradInputs])
def local_abstractconv3d_gradinputs_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, TensorType) or \
not isinstance(topgrad.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(kern, topgrad,
shape)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d])
def local_conv2d_cpu(node):
......@@ -481,6 +553,14 @@ conv_groupopt.register('local_abstractconv_gradweight_gemm',
conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gemm', local_abstractconv3d_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gradweight_gemm',
local_abstractconv3d_gradweight_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gradinputs_gemm',
local_abstractconv3d_gradinputs_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
# Legacy convolution
conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
'fast_compile', 'fast_run')
......
......@@ -20,6 +20,8 @@ from theano.tensor.nnet.abstract_conv import bilinear_upsampling
from theano.tensor.nnet.conv import ConvOp
from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
CorrMM_gradInputs)
from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
Corr3dMM_gradInputs)
from theano.tensor.nnet.Conv3D import Conv3D
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
......@@ -734,11 +736,9 @@ class TestCorrConv3d(BaseTestConv3d):
BaseTestConv3d.setup_class()
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
if b not in ((0, 0, 0), 'valid'):
raise SkipTest("Only border_mode valid is implemented for basic cpu Conv3D.")
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for basic cpu Conv3D.")
o = self.get_output_shape(i, f, s, b, fd)
if fd != (1, 1, 1):
raise SkipTest("No reference implementation for 3D dilation.")
if (not theano.config.blas.ldflags or
not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"):
......@@ -746,17 +746,17 @@ class TestCorrConv3d(BaseTestConv3d):
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, provide_shape=provide_shape,
border_mode=b, filter_flip=flip,
target_op=Conv3D, check_trace=True,
target_op=Corr3dMM, check_trace=True,
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=ConvGrad3D,
filter_flip=flip, target_op=Corr3dMM_gradWeights,
check_trace=True, filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=ConvTransp3D,
filter_flip=flip, target_op=Corr3dMM_gradInputs,
check_trace=True, filter_dilation=fd)
......@@ -764,7 +764,6 @@ class TestCpuConv3d(BaseTestConv3d):
@classmethod
def setup(cls):
BaseTestConv3d.setup_class()
# TODO check how conv_gemm works for conv3d
cls.mode = theano.compile.mode.get_default_mode().excluding('conv_gemm')
cls.opt_err = theano.config.on_opt_error
theano.config.on_opt_error = 'ignore'
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论