提交 9592125c authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6286 from vikramnitin9/grouped_unshared

Implement Unshared Convolution
差异被折叠。
...@@ -3035,6 +3035,9 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -3035,6 +3035,9 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if version(raises=False) < 6000 and op.filter_dilation != (1, 1): if version(raises=False) < 6000 and op.filter_dilation != (1, 1):
return None return None
if op.unshared:
return None
inp1 = inputs[0] inp1 = inputs[0]
inp2 = inputs[1] inp2 = inputs[1]
...@@ -3129,6 +3132,8 @@ def local_abstractconv_cudnn(node): ...@@ -3129,6 +3132,8 @@ def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared:
return None
if isinstance(node.op, AbstractConv2d): if isinstance(node.op, AbstractConv2d):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d): elif isinstance(node.op, AbstractConv3d):
...@@ -3143,6 +3148,8 @@ def local_abstractconv_cudnn_alt(node): ...@@ -3143,6 +3148,8 @@ def local_abstractconv_cudnn_alt(node):
if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1):
return None return None
if node.op.unshared:
return None
inp1 = node.inputs[0] inp1 = node.inputs[0]
inp2 = node.inputs[1] inp2 = node.inputs[1]
...@@ -3349,6 +3356,8 @@ def local_abstractconv_gw_cudnn(node): ...@@ -3349,6 +3356,8 @@ def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared:
return None
if isinstance(node.op, AbstractConv2d_gradWeights): if isinstance(node.op, AbstractConv2d_gradWeights):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d_gradWeights): elif isinstance(node.op, AbstractConv3d_gradWeights):
...@@ -3360,6 +3369,8 @@ def local_abstractconv_gi_cudnn(node): ...@@ -3360,6 +3369,8 @@ def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared:
return None
if isinstance(node.op, AbstractConv2d_gradInputs): if isinstance(node.op, AbstractConv2d_gradInputs):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
elif isinstance(node.op, AbstractConv3d_gradInputs): elif isinstance(node.op, AbstractConv3d_gradInputs):
......
...@@ -1595,12 +1595,17 @@ def local_abstractconv_gemm(node): ...@@ -1595,12 +1595,17 @@ def local_abstractconv_gemm(node):
border_mode = node.op.border_mode border_mode = node.op.border_mode
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups
unshared = node.op.unshared
if ((border_mode == 'full') and (subsample == (1, 1)) and node.op.num_groups == 1): flip = (slice(None),) * (kern.ndim - 2) + \
(slice(None, None, -1),) * 2
kern_axes = (1, 0) + tuple(i for i in range(2, kern.ndim))
if ((border_mode == 'full') and (subsample == (1, 1)) and num_groups == 1 and not unshared):
if not node.op.filter_flip: if not node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[flip]
# need to dimshuffle the kernel for full convolution # need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3) kern = kern.dimshuffle(kern_axes)
# call GpuCorrMM_gradInputs # call GpuCorrMM_gradInputs
rval = GpuCorrMM_gradInputs('valid', rval = GpuCorrMM_gradInputs('valid',
subsample, subsample,
...@@ -1609,13 +1614,14 @@ def local_abstractconv_gemm(node): ...@@ -1609,13 +1614,14 @@ def local_abstractconv_gemm(node):
else: else:
# need to flip the kernel if necessary # need to flip the kernel if necessary
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[flip]
# By default use GpuCorrMM # By default use GpuCorrMM
rval = GpuCorrMM(border_mode, rval = GpuCorrMM(border_mode,
subsample, subsample,
filter_dilation, filter_dilation,
node.op.num_groups)(gpu_contiguous(img), num_groups,
gpu_contiguous(kern)) unshared)(gpu_contiguous(img),
gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good # call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth # (the latter is faster if batchsize * kernelHeight * kernelWidth
...@@ -1628,11 +1634,12 @@ def local_abstractconv_gemm(node): ...@@ -1628,11 +1634,12 @@ def local_abstractconv_gemm(node):
(node.op.kshp is not None) and (node.op.kshp is not None) and
(None not in node.op.kshp) and (None not in node.op.kshp) and
border_mode != "half" and border_mode != "half" and
node.op.num_groups == 1): num_groups == 1 and
not unshared):
# we know the kernel and output size # we know the kernel and output size
prod1 = node.op.kshp[0] * node.op.kshp[1] prod1 = node.op.kshp[0] * node.op.kshp[-3]
prod2 = ((node.op.imshp[-2] - node.op.kshp[0] + 1) * prod2 = ((node.op.imshp[-2] - node.op.kshp[0] + 1) *
(node.op.imshp[-1] - node.op.kshp[1] + 1)) (node.op.imshp[-1] - node.op.kshp[-3] + 1))
if (None not in node.op.imshp[:1]): if (None not in node.op.imshp[:1]):
# we also know batchsize and input channels # we also know batchsize and input channels
prod1 *= node.op.imshp[0] prod1 *= node.op.imshp[0]
...@@ -1666,13 +1673,19 @@ def local_abstractconv_gemm_def(node): ...@@ -1666,13 +1673,19 @@ def local_abstractconv_gemm_def(node):
border_mode = node.op.border_mode border_mode = node.op.border_mode
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups
unshared = node.op.unshared
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] flip = (slice(None),) * (kern.ndim - 2) + \
(slice(None, None, -1),) * 2
kern = kern[flip]
rval = GpuCorrMM(border_mode, rval = GpuCorrMM(border_mode,
subsample, subsample,
filter_dilation, filter_dilation,
node.op.num_groups)(gpu_contiguous(img), num_groups,
gpu_contiguous(kern)) unshared)(gpu_contiguous(img),
gpu_contiguous(kern))
return [rval] return [rval]
...@@ -1690,8 +1703,9 @@ def local_abstractconv_gemm_alt(node): ...@@ -1690,8 +1703,9 @@ def local_abstractconv_gemm_alt(node):
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups num_groups = node.op.num_groups
unshared = node.op.unshared
if border_mode == 'full' and subsample == (1, 1) and num_groups == 1: if border_mode == 'full' and subsample == (1, 1) and num_groups == 1 and not unshared:
if not node.op.filter_flip: if not node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
...@@ -1702,7 +1716,7 @@ def local_abstractconv_gemm_alt(node): ...@@ -1702,7 +1716,7 @@ def local_abstractconv_gemm_alt(node):
gpu_contiguous(kern), gpu_contiguous(img)) gpu_contiguous(kern), gpu_contiguous(img))
elif (border_mode == 'valid' and subsample == (1, 1) and filter_dilation == (1, 1) and elif (border_mode == 'valid' and subsample == (1, 1) and filter_dilation == (1, 1) and
num_groups == 1): num_groups == 1 and not unshared):
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
...@@ -1896,10 +1910,13 @@ def local_abstractconv_gradweights_gemm(node): ...@@ -1896,10 +1910,13 @@ def local_abstractconv_gradweights_gemm(node):
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation, filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups)( num_groups=node.op.num_groups,
unshared=node.op.unshared)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape) gpu_contiguous(img), gpu_contiguous(topgrad), shape)
flip = (slice(None),) * (rval.ndim - 2) + \
(slice(None, None, -1),) * 2
if node.op.filter_flip: if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1] rval = rval[flip]
rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
rval = as_gpuarray_variable(rval, context_name=ctx) rval = as_gpuarray_variable(rval, context_name=ctx)
return [rval] return [rval]
...@@ -1918,9 +1935,10 @@ def local_abstractconv_gemm_gradweights_alt(node): ...@@ -1918,9 +1935,10 @@ def local_abstractconv_gemm_gradweights_alt(node):
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups num_groups = node.op.num_groups
unshared = node.op.unshared
if(border_mode == 'valid' and subsample == (1, 1) and filter_dilation == (1, 1) and if(border_mode == 'valid' and subsample == (1, 1) and filter_dilation == (1, 1) and
num_groups == 1): num_groups == 1 and not unshared):
rval = GpuCorrMM(border_mode, rval = GpuCorrMM(border_mode,
subsample, subsample,
filter_dilation)( filter_dilation)(
...@@ -2001,12 +2019,15 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -2001,12 +2019,15 @@ def local_abstractconv_gradinputs_gemm(node):
return None return None
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] flip = (slice(None),) * (kern.ndim - 2) + \
(slice(None, None, -1),) * 2
kern = kern[flip]
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode, rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation, filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups)( num_groups=node.op.num_groups,
unshared=node.op.unshared)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape) gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval] return [rval]
...@@ -2023,8 +2044,9 @@ def local_abstractconv_gradinputs_gemm_alt(node): ...@@ -2023,8 +2044,9 @@ def local_abstractconv_gradinputs_gemm_alt(node):
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups num_groups = node.op.num_groups
unshared = node.op.unshared
if border_mode == 'valid' and subsample == (1, 1) and num_groups == 1: if border_mode == 'valid' and subsample == (1, 1) and num_groups == 1 and not unshared:
if not node.op.filter_flip: if not node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
...@@ -2117,8 +2139,9 @@ class ConvMetaOptimizer(LocalMetaOptimizer): ...@@ -2117,8 +2139,9 @@ class ConvMetaOptimizer(LocalMetaOptimizer):
node.op.border_mode, node.op.border_mode,
node.op.subsample, node.op.subsample,
node.op.filter_dilation) node.op.filter_dilation)
convdim = img.ndim - 2
result[kshape] = theano.tensor.as_tensor_variable(node.op.kshp[2:]) result[kshape] = theano.tensor.as_tensor_variable(node.op.kshp[-convdim:])
for(var, shape) in zip((img, top), (node.op.imshp, tshp)): for(var, shape) in zip((img, top), (node.op.imshp, tshp)):
result[var] = theano.shared(np.random.random(shape).astype(var.dtype), result[var] = theano.shared(np.random.random(shape).astype(var.dtype),
......
...@@ -11,7 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput ...@@ -11,7 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from .config import mode_with_gpu, mode_without_gpu, ref_cast from .config import mode_with_gpu, mode_without_gpu, ref_cast
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
class TestCorrMM(unittest.TestCase): class TestCorrMM(unittest.TestCase):
...@@ -20,9 +20,13 @@ class TestCorrMM(unittest.TestCase): ...@@ -20,9 +20,13 @@ class TestCorrMM(unittest.TestCase):
border_mode='valid', border_mode='valid',
filter_dilation=(1, 1), filter_dilation=(1, 1),
subsample=(1, 1), subsample=(1, 1),
unshared=False,
verify_grad=False): verify_grad=False):
inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)] inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)] if unshared:
filters_shape = [filters_shape[i] for i in (0, 1, 2, 5, 3, 4)]
else:
filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
inputs_val = np.random.random(inputs_shape).astype(config.floatX) inputs_val = np.random.random(inputs_shape).astype(config.floatX)
filters_val = np.random.random(filters_shape).astype(config.floatX) filters_val = np.random.random(filters_shape).astype(config.floatX)
...@@ -32,13 +36,15 @@ class TestCorrMM(unittest.TestCase): ...@@ -32,13 +36,15 @@ class TestCorrMM(unittest.TestCase):
conv_ref = CorrMM(border_mode=border_mode, conv_ref = CorrMM(border_mode=border_mode,
filter_dilation=filter_dilation, filter_dilation=filter_dilation,
subsample=subsample)(ref_cast(inputs), subsample=subsample,
ref_cast(filters)) unshared=unshared)(ref_cast(inputs),
ref_cast(filters))
f_ref = theano.function([], conv_ref, mode=mode_without_gpu) f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
conv = GpuCorrMM(border_mode=border_mode, conv = GpuCorrMM(border_mode=border_mode,
filter_dilation=filter_dilation, filter_dilation=filter_dilation,
subsample=subsample)(inputs, filters) subsample=subsample,
unshared=unshared)(inputs, filters)
f = theano.function([], conv, mode=mode_with_gpu) f = theano.function([], conv, mode=mode_with_gpu)
res_ref = f_ref() res_ref = f_ref()
...@@ -48,7 +54,8 @@ class TestCorrMM(unittest.TestCase): ...@@ -48,7 +54,8 @@ class TestCorrMM(unittest.TestCase):
if verify_grad: if verify_grad:
utt.verify_grad(GpuCorrMM(border_mode=border_mode, utt.verify_grad(GpuCorrMM(border_mode=border_mode,
filter_dilation=filter_dilation, filter_dilation=filter_dilation,
subsample=subsample), subsample=subsample,
unshared=unshared),
[inputs_val, filters_val], mode=mode_with_gpu) [inputs_val, filters_val], mode=mode_with_gpu)
def test_valid(self): def test_valid(self):
...@@ -57,12 +64,6 @@ class TestCorrMM(unittest.TestCase): ...@@ -57,12 +64,6 @@ class TestCorrMM(unittest.TestCase):
self.run_conv_valid(inputs_shape=(16, 20, 12, 1), self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1), filters_shape=(10, 6, 12, 1),
subsample=(2, 2)) subsample=(2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(3, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1), self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1), filters_shape=(10, 6, 12, 1),
subsample=(3, 3)) subsample=(3, 3))
...@@ -117,6 +118,41 @@ class TestCorrMM(unittest.TestCase): ...@@ -117,6 +118,41 @@ class TestCorrMM(unittest.TestCase):
border_mode=border_mode, border_mode=border_mode,
verify_grad=True) verify_grad=True)
def test_unshared(self):
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 15, 1, 6, 12, 1),
unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 8, 1, 6, 12, 1),
subsample=(2, 2), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 5, 1, 6, 12, 1),
subsample=(3, 3), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 5, 1, 6, 12, 1),
subsample=(3, 2), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 15, 1, 6, 12, 1),
subsample=(1, 2), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 15, 1, 6, 12, 1),
border_mode='valid', unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 21, 13, 6, 12, 1),
border_mode='half', unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 25, 23, 6, 12, 1),
border_mode='full', unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 15, 1, 6, 12, 1),
border_mode=(0, 0), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 17, 5, 6, 12, 1),
border_mode=(1, 2), unshared=True)
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 21, 5, 6, 12, 1),
border_mode=(3, 2), unshared=True)
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
subsample=(1, 1)): subsample=(1, 1)):
inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)] inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
...@@ -227,3 +263,12 @@ class TestGroupGpuCorr2d(Grouped_conv_noOptim): ...@@ -227,3 +263,12 @@ class TestGroupGpuCorr2d(Grouped_conv_noOptim):
conv_op = GpuCorrMM conv_op = GpuCorrMM
conv_gradw_op = GpuCorrMM_gradWeights conv_gradw_op = GpuCorrMM_gradWeights
conv_gradi_op = GpuCorrMM_gradInputs conv_gradi_op = GpuCorrMM_gradInputs
flip_filter = True
is_dnn = False
class TestUnsharedGpuCorr2d(TestUnsharedConv):
mode = mode_with_gpu
conv2d_op = GpuCorrMM
conv2d_gradw_op = GpuCorrMM_gradWeights
conv2d_gradi_op = GpuCorrMM_gradInputs
...@@ -37,7 +37,7 @@ from .abstract_conv import separable_conv2d ...@@ -37,7 +37,7 @@ from .abstract_conv import separable_conv2d
def conv2d(input, filters, input_shape=None, filter_shape=None, def conv2d(input, filters, input_shape=None, filter_shape=None,
border_mode='valid', subsample=(1, 1), filter_flip=True, border_mode='valid', subsample=(1, 1), filter_flip=True,
image_shape=None, filter_dilation=(1, 1), num_groups=1, **kwargs): image_shape=None, filter_dilation=(1, 1), num_groups=1, unshared=False, **kwargs):
""" """
This function will build the symbolic graph for convolving a mini-batch of a This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled stack of 2D inputs with a set of 2D filters. The implementation is modelled
...@@ -51,18 +51,22 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -51,18 +51,22 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
(batch size, input channels, input rows, input columns). (batch size, input channels, input rows, input columns).
See the optional parameter ``input_shape``. See the optional parameter ``input_shape``.
filters: symbolic 4D tensor filters: symbolic 4D or 6D tensor
Set of filters used in CNN layer of shape Set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns). (output channels, input channels, filter rows, filter columns)
for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns)
for unshared convolution.
See the optional parameter ``filter_shape``. See the optional parameter ``filter_shape``.
input_shape: None, tuple/list of len 4 of int or Constant variable input_shape: None, tuple/list of len 4 or 6 of int or Constant variable
The shape of the input parameter. The shape of the input parameter.
Optional, possibly used to choose an optimal implementation. Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this You can give ``None`` for any element of the list to specify that this
element is not known at compile time. element is not known at compile time.
filter_shape: None, tuple/list of len 4 of int or Constant variable filter_shape: None, tuple/list of len 4 or 6 of int or Constant variable
The shape of the filters parameter. The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation. Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this You can give ``None`` for any element of the list to specify that this
...@@ -105,6 +109,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -105,6 +109,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
Divides the image, kernel and output tensors into num_groups Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
kwargs: Any other keyword arguments are accepted for backwards kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored. compatibility, but will be ignored.
...@@ -154,12 +163,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -154,12 +163,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
return abstract_conv2d(input, filters, input_shape, filter_shape, return abstract_conv2d(input, filters, input_shape, filter_shape,
border_mode, subsample, filter_flip, border_mode, subsample, filter_flip,
filter_dilation, num_groups) filter_dilation, num_groups, unshared)
def conv2d_transpose(input, filters, output_shape, filter_shape=None, def conv2d_transpose(input, filters, output_shape, filter_shape=None,
border_mode='valid', input_dilation=(1, 1), border_mode='valid', input_dilation=(1, 1),
filter_flip=True, filter_dilation=(1, 1), num_groups=1): filter_flip=True, filter_dilation=(1, 1), num_groups=1, unshared=False):
""" """
This function will build the symbolic graph for applying a transposed This function will build the symbolic graph for applying a transposed
convolution over a mini-batch of a stack of 2D inputs with a set of 2D convolution over a mini-batch of a stack of 2D inputs with a set of 2D
...@@ -215,6 +224,12 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None, ...@@ -215,6 +224,12 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
Divides the image, kernel and output tensors into num_groups Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Grouped unshared convolution is supported.
Returns Returns
------- -------
Symbolic 4D tensor Symbolic 4D tensor
...@@ -242,4 +257,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None, ...@@ -242,4 +257,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
subsample=input_dilation, subsample=input_dilation,
filter_flip=filter_flip, filter_flip=filter_flip,
filter_dilation=filter_dilation, filter_dilation=filter_dilation,
num_groups=num_groups) num_groups=num_groups,
unshared=unshared)
差异被折叠。
...@@ -82,12 +82,14 @@ def local_abstractconv_gemm(node): ...@@ -82,12 +82,14 @@ def local_abstractconv_gemm(node):
# need to flip the kernel if necessary # need to flip the kernel if necessary
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] flip = (slice(None),) * (kern.ndim - 2) + \
(slice(None, None, -1),) * 2
kern = kern[flip]
rval = CorrMM(border_mode=node.op.border_mode, rval = CorrMM(border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation, filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups)(img, kern) num_groups=node.op.num_groups,
unshared=node.op.unshared)(img, kern)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
return [rval] return [rval]
...@@ -134,12 +136,15 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -134,12 +136,15 @@ def local_abstractconv_gradweight_gemm(node):
rval = CorrMM_gradWeights(border_mode=node.op.border_mode, rval = CorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation, filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups)(img, topgrad, shape) num_groups=node.op.num_groups,
unshared=node.op.unshared)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary # need to flip the kernel if necessary
if node.op.filter_flip: if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1] flip = (slice(None),) * (rval.ndim - 2) + \
(slice(None, None, -1),) * 2
rval = rval[flip]
rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
...@@ -189,12 +194,14 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -189,12 +194,14 @@ def local_abstractconv_gradinputs_gemm(node):
# need to flip the kernel if necessary # need to flip the kernel if necessary
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] flip = (slice(None),) * (kern.ndim - 2) + \
(slice(None, None, -1),) * 2
kern = kern[flip]
rval = CorrMM_gradInputs(border_mode=node.op.border_mode, rval = CorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation, filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups)(kern, topgrad, num_groups=node.op.num_groups,
shape) unshared=node.op.unshared)(kern, topgrad, shape)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
return [rval] return [rval]
...@@ -242,7 +249,7 @@ def local_conv2d_cpu(node): ...@@ -242,7 +249,7 @@ def local_conv2d_cpu(node):
if not node.op.filter_flip: if not node.op.filter_flip:
# Not tested yet # Not tested yet
return None return None
if node.op.num_groups > 1: if node.op.num_groups > 1 or node.op.unshared:
return None return None
rval = conv2d(img, kern, rval = conv2d(img, kern,
...@@ -270,7 +277,7 @@ def local_conv2d_gradweight_cpu(node): ...@@ -270,7 +277,7 @@ def local_conv2d_gradweight_cpu(node):
if not node.op.filter_flip: if not node.op.filter_flip:
# Not tested yet # Not tested yet
return return
if node.op.num_groups > 1: if node.op.num_groups > 1 or node.op.unshared:
return None return None
if node.op.border_mode == 'valid' and \ if node.op.border_mode == 'valid' and \
...@@ -370,7 +377,7 @@ def local_conv2d_gradinputs_cpu(node): ...@@ -370,7 +377,7 @@ def local_conv2d_gradinputs_cpu(node):
if not node.op.filter_flip: if not node.op.filter_flip:
# Not tested yet # Not tested yet
return None return None
if node.op.num_groups > 1: if node.op.num_groups > 1 or node.op.unshared:
return None return None
# Conv 3d implementation, needed when subsample > 2 # Conv 3d implementation, needed when subsample > 2
......
...@@ -1744,3 +1744,154 @@ class Separable_conv(unittest.TestCase): ...@@ -1744,3 +1744,154 @@ class Separable_conv(unittest.TestCase):
fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
top = fun(x[:, :, :3, :3, :3], depthwise_filter, pointwise_filter) top = fun(x[:, :, :3, :3, :3], depthwise_filter, pointwise_filter)
utt.assert_allclose(top, precomp_output) utt.assert_allclose(top, precomp_output)
class TestUnsharedConv(unittest.TestCase):
conv2d = theano.tensor.nnet.abstract_conv.AbstractConv2d
conv2d_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
conv2d_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
conv2d_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
mode = theano.compile.mode.Mode(optimizer='None')
def setUp(self):
self.img_shape = [(2, 2, 4, 4), (3, 2, 4, 2), (3, 3, 5, 3), (3, 4, 4, 4)]
self.kern_shape = [(2, 2, 2, 2, 3, 3), (2, 4, 2, 2, 4, 2), (3, 2, 1, 1, 3, 3), (4, 3, 3, 2, 4, 2)]
self.topgrad_shape = [(2, 2, 2, 2), (3, 2, 4, 2), (3, 3, 2, 1), (3, 4, 3, 3)]
self.border_mode = ['valid', 'full', 'valid', 'full']
self.subsample = [(1, 1), (2, 2), (2, 1), (3, 2)]
self.filter_dilation = (1, 1)
self.num_groups = [1, 1, 3, 2]
# self.verify_flags = np.random.choice([True, False], 4, [0.5, 0.5])
# Above line can be used instead if speed is a concern
self.verify_flags = [True] * 4
self.ref_mode = 'FAST_RUN'
if theano.config.cxx == "":
raise SkipTest("CorrMM needs cxx")
def test_fwd(self):
tensor6 = theano.tensor.TensorType(theano.config.floatX, (False,) * 6)
img_sym = theano.tensor.tensor4('img')
kern_sym = tensor6('kern')
ref_kern_sym = theano.tensor.tensor4('ref_kern')
for imshp, kshp, mode, sub, groups, verify in zip(self.img_shape, self.kern_shape, self.border_mode,
self.subsample, self.num_groups, self.verify_flags):
img = np.random.random(imshp).astype(theano.config.floatX)
kern = np.random.random(kshp).astype(theano.config.floatX)
unshared_conv_op = self.conv2d(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=True)
unshared_out_sym = unshared_conv_op(img_sym, kern_sym)
unshared_func = theano.function([img_sym, kern_sym], unshared_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_op)
for node in unshared_func.maker.fgraph.toposort()])
unshared_output = unshared_func(img, kern)
single_kshp = kshp[:1] + kshp[3:]
ref_conv_op = self.conv2d(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=False)
ref_out_sym = ref_conv_op(img_sym, ref_kern_sym)
ref_func = theano.function([img_sym, ref_kern_sym], ref_out_sym, mode=self.mode)
for i in range(0, kshp[1]):
for j in range(0, kshp[2]):
single_kern = kern[:, i, j, ...].reshape(single_kshp)
ref_val = ref_func(img, single_kern)
utt.assert_allclose(ref_val[:, :, i, j], unshared_output[:, :, i, j])
if verify:
utt.verify_grad(unshared_conv_op, [img, kern], mode=self.mode, eps=1)
def test_gradweight(self):
img_sym = theano.tensor.tensor4('img')
top_sym = theano.tensor.tensor4('top')
for imshp, kshp, topshp, mode, sub, groups, verify in zip(self.img_shape, self.kern_shape, self.topgrad_shape,
self.border_mode, self.subsample, self.num_groups,
self.verify_flags):
img = np.random.random(imshp).astype(theano.config.floatX)
top = np.random.random(topshp).astype(theano.config.floatX)
unshared_conv_op = self.conv2d_gradw(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=True)
unshared_out_sym = unshared_conv_op(img_sym, top_sym, tensor.as_tensor_variable(kshp[-2:]))
unshared_func = theano.function([img_sym, top_sym], unshared_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_gradw_op)
for node in unshared_func.maker.fgraph.toposort()])
unshared_output = unshared_func(img, top)
single_kshp = kshp[:1] + kshp[3:]
ref_conv_op = self.conv2d_gradw(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=False)
ref_out_sym = ref_conv_op(img_sym, top_sym, tensor.as_tensor_variable(single_kshp[-2:]))
ref_func = theano.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
for i in range(0, topshp[2]):
for j in range(0, topshp[3]):
top_single = np.zeros_like(top)
top_single[:, :, i, j] = top[:, :, i, j]
ref_output = ref_func(img, top_single)
utt.assert_allclose(unshared_output[:, i, j, ...], ref_output)
def conv_gradweight(inputs_val, output_val):
return unshared_conv_op(inputs_val, output_val, tensor.as_tensor_variable(kshp[-2:]))
if verify:
utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
def test_gradinput(self):
tensor6 = theano.tensor.TensorType(theano.config.floatX, (False,) * 6)
kern_sym = tensor6('kern')
top_sym = theano.tensor.tensor4('top')
ref_kern_sym = theano.tensor.tensor4('ref_kern')
for imshp, kshp, topshp, mode, sub, groups, verify in zip(self.img_shape, self.kern_shape, self.topgrad_shape,
self.border_mode, self.subsample, self.num_groups,
self.verify_flags):
single_kshp = kshp[:1] + kshp[3:]
kern = np.random.random(kshp).astype(theano.config.floatX)
top = np.random.random(topshp).astype(theano.config.floatX)
unshared_conv_op = self.conv2d_gradi(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=True)
unshared_out_sym = unshared_conv_op(kern_sym, top_sym, tensor.as_tensor_variable(imshp[-2:]))
unshared_func = theano.function([kern_sym, top_sym], unshared_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_gradi_op)
for node in unshared_func.maker.fgraph.toposort()])
unshared_output = unshared_func(kern, top)
ref_conv_op = self.conv2d_gradi(border_mode=mode, subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups, unshared=False)
ref_out_sym = ref_conv_op(ref_kern_sym, top_sym, tensor.as_tensor_variable(imshp[-2:]))
ref_func = theano.function([ref_kern_sym, top_sym], ref_out_sym, mode=self.mode)
ref_output = np.zeros(imshp)
for i in range(0, topshp[2]):
for j in range(0, topshp[3]):
single_kern = kern[:, i, j, ...].reshape(single_kshp)
top_single = np.zeros_like(top)
top_single[:, :, i, j] = top[:, :, i, j]
ref_output += ref_func(single_kern, top_single)
utt.assert_allclose(ref_output, unshared_output)
def conv_gradinputs(filters_val, output_val):
return unshared_conv_op(filters_val, output_val, tensor.as_tensor_variable(imshp[-2:]))
if verify:
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
...@@ -10,7 +10,7 @@ import theano ...@@ -10,7 +10,7 @@ import theano
import theano.tensor as T import theano.tensor as T
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr, conv from theano.tensor.nnet import corr, conv
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
class TestCorr2D(utt.InferShapeTester): class TestCorr2D(utt.InferShapeTester):
...@@ -452,6 +452,16 @@ class TestGroupCorr2d(Grouped_conv_noOptim): ...@@ -452,6 +452,16 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
utt.assert_allclose(gconv_output, conv_output) utt.assert_allclose(gconv_output, conv_output)
class TestUnsharedCorr2d(TestUnsharedConv):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
else:
mode = None
conv2d_op = corr.CorrMM
conv2d_gradw_op = corr.CorrMM_gradWeights
conv2d_gradi_op = corr.CorrMM_gradInputs
if __name__ == '__main__': if __name__ == '__main__':
t = TestCorr2D('setUp') t = TestCorr2D('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论