提交 401a4dbe authored 作者: Gijs van Tulder's avatar Gijs van Tulder

CPU implementation for Corr3DMM and gradients.

The new Corr3dMM opts are also used to optimise AbstractConv. The code is similar to the 3D version in corr_gem.c.
上级 2681cd70
...@@ -234,7 +234,7 @@ def conv3d(input, filters, input_shape=None, filter_shape=None, ...@@ -234,7 +234,7 @@ def conv3d(input, filters, input_shape=None, filter_shape=None,
Notes Notes
----- -----
If cuDNN is available, it will be used on the If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution". "caffe style convolution".
This is only supported in Theano 0.8 or the development This is only supported in Theano 0.8 or the development
......
...@@ -417,7 +417,7 @@ def conv3d_grad_wrt_inputs(output_grad, ...@@ -417,7 +417,7 @@ def conv3d_grad_wrt_inputs(output_grad,
----- -----
:note: If cuDNN is available, it will be used on the :note: If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution". "caffe style convolution".
:note: This is only supported in Theano 0.8 or the development :note: This is only supported in Theano 0.8 or the development
...@@ -670,7 +670,7 @@ def conv3d_grad_wrt_weights(input, ...@@ -670,7 +670,7 @@ def conv3d_grad_wrt_weights(input,
----- -----
:note: If cuDNN is available, it will be used on the :note: If cuDNN is available, it will be used on the
GPU. Otherwise, it is the *CorrMM* convolution that will be used GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
"caffe style convolution". "caffe style convolution".
:note: This is only supported in Theano 0.8 or the development :note: This is only supported in Theano 0.8 or the development
......
from __future__ import absolute_import, print_function, division
import os
import logging
from six import integer_types
import theano
from theano import Apply
from theano import gof
from theano.tensor import as_tensor_variable, TensorType
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from theano.tensor import blas_headers
from theano.tensor.blas import ldflags, blas_header_version
_logger = logging.getLogger(__name__)
class BaseCorr3dMM(gof.OpenMPOp):
"""
Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and
`Corr3dMM_gradInputs`. Cannot be used directly.
Parameters
----------
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
or a tuple of three of integers
subsample
Perform subsampling of the output (default: (1, 1, 1)).
filter_dilation
Perform dilated correlation (default: (1, 1, 1))
"""
check_broadcast = False
__props__ = ('border_mode', 'subsample', 'filter_dilation')
def __init__(self, border_mode="valid", subsample=(1, 1, 1),
filter_dilation=(1, 1, 1), openmp=None):
super(BaseCorr3dMM, self).__init__(openmp=openmp)
if isinstance(border_mode, integer_types):
if border_mode < 0:
raise ValueError(
'invalid border_mode {}, which must be a '
'non-negative integer'.format(border_mode))
border_mode = (border_mode, border_mode, border_mode)
if isinstance(border_mode, tuple):
if len(border_mode) != 3 or min(border_mode) < 0:
raise ValueError(
'invalid border_mode {}, which must be a tuple of '
'three non-negative integers'.format(border_mode))
pad_h, pad_w, pad_d = map(int, border_mode)
border_mode = (pad_h, pad_w, pad_d)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of three'
' integers'.format(border_mode))
self.border_mode = border_mode
if len(subsample) != 3:
raise ValueError("subsample must have three elements")
if len(filter_dilation) != 3:
raise ValueError("filter_dilation must have three elements")
self.subsample = tuple(subsample)
self.filter_dilation = tuple(filter_dilation)
if not theano.config.blas.ldflags:
raise NotImplementedError("C code for corrMM* classes need a blas library.")
else:
if 'openblas' in theano.config.blas.ldflags:
self.blas_type = 'openblas'
elif 'mkl' in theano.config.blas.ldflags:
self.blas_type = 'mkl'
else:
self.blas_type = ''
@property
def pad(self):
if self.border_mode != 'valid':
return self.border_mode
return (0, 0, 0)
def __str__(self):
return '%s{%s, %s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample),
str(self.filter_dilation))
@staticmethod
def as_common_dtype(in1, in2):
"""
Upcast input variables if neccesary.
"""
dtype = theano.scalar.upcast(in1.dtype, in2.dtype)
return in1.astype(dtype), in2.astype(dtype)
def c_support_code(self):
ccodes = blas_headers.blas_header_text()
if self.blas_type == 'openblas':
ccodes += blas_headers.openblas_threads_text()
elif self.blas_type == 'mkl':
ccodes += blas_headers.mkl_threads_text()
return ccodes
def c_libraries(self):
return ldflags()
def c_compile_args(self):
compile_args = ldflags(libs=False, flags=True)
compile_args += super(BaseCorr3dMM, self).c_compile_args()
return compile_args
def c_lib_dirs(self):
return ldflags(libs=False, libs_dir=True)
def c_header_dirs(self):
return ldflags(libs=False, include_dir=True)
def c_headers(self):
headers = ['<stdio.h>']
headers += super(BaseCorr3dMM, self).c_headers()
return headers
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (1, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# these files
sub = {}
dtype = str(node.__dict__['inputs'][0].dtype)
assert dtype in ('float32', 'float64')
if dtype == 'float32':
sub['gemm'] = 'sgemm_'
sub['float_type'] = 'npy_float'
sub['float_typenum'] = 'NPY_FLOAT'
sub['n_bytes'] = 4
sub['c_float_type'] = 'float'
else:
sub['gemm'] = 'dgemm_'
sub['float_type'] = 'npy_double'
sub['float_typenum'] = 'NPY_DOUBLE'
sub['n_bytes'] = 8
sub['c_float_type'] = 'double'
if self.openmp:
sub['omp_flags'] = '#pragma omp parallel for schedule(static)'
sub['omp_get_max_threads'] = 'omp_get_max_threads()'
sub['omp_get_thread_num'] = 'omp_get_thread_num()'
if self.blas_type == 'openblas':
sub['blas_set_num_threads'] = 'openblas_set_num_threads'
sub['blas_get_num_threads'] = 'openblas_get_num_threads()'
elif self.blas_type == 'mkl':
sub['blas_set_num_threads'] = 'mkl_set_num_threads'
sub['blas_get_num_threads'] = 'mkl_get_max_threads()'
else:
sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0'
else:
sub['omp_flags'] = ''
sub['omp_get_max_threads'] = '1'
sub['omp_get_thread_num'] = '0'
sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0'
files = ['corr3d_gemm.c']
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files]
final_code = ''
for code in codes:
final_code += code
return final_code % sub
def c_code_helper(self, bottom, weights, top, direction, sub,
height=None, width=None, depth=None):
"""
This generates the C code for Corr3dMM (direction="forward"),
Corr3dMM_gradWeights (direction="backprop weights"), and
Corr3dMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param direction: "forward" to correlate bottom with weights and store
results in top,
"backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights,
and "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
:param depth: If self.subsample[1] != 1, a variable giving the depth
of the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the depth of the
filters for direction="backprop weights". Ignored otherwise.
"""
dH, dW, dD = self.subsample
dilH, dilW, dilD = self.filter_dilation
if self.border_mode == "half":
padH = padW = padD = -1
elif self.border_mode == "full":
padH = padW = padD = -2
elif isinstance(self.border_mode, tuple):
padH, padW, padD = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = padD = 0
if direction == "forward":
direction = 0
out = top
elif direction == "backprop weights":
direction = 1
out = weights
elif direction == "backprop inputs":
direction = 2
out = bottom
else:
raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'")
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height
else:
height = '-1'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width
else:
width = '-1'
if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)):
if not depth:
raise ValueError("depth must be given for backprop with depth sampling or border_mode='half'")
depth = '(*(npy_int64 *)(PyArray_DATA(%s)))' % depth
else:
depth = '-1'
sub = sub.copy()
sub.update(locals())
return """
// Mandatory args
int direction = %(direction)s; // forward, bprop weights, bprop inputs
// Optional args
int dH = %(dH)s;
int dW = %(dW)s;
int dD = %(dD)s;
int dilH = %(dilH)s;
int dilW = %(dilW)s;
int dilD = %(dilD)s;
int padH = %(padH)s;
int padW = %(padW)s;
int padD = %(padD)s;
PyArrayObject * bottom = %(bottom)s;
PyArrayObject * weights = %(weights)s;
PyArrayObject * top = %(top)s;
PyArrayObject * out2 = NULL;
// Obtain or infer kernel width, height and depth
// (we need to know it early to be able to handle auto-padding)
int kH, kW, kD;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
kD = PyArray_DIMS(weights)[4];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
kH = %(height)s;
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
}
else {
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if ((dW != 1) || (padW == -1)) {
kW = %(width)s;
}
else if (padW == -2) {
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((dD != 1) || (padD == -1)) {
kD = %(depth)s;
}
else if (padD == -2) {
kD = (2 - PyArray_DIMS(bottom)[4] + (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
else {
kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
int dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = dil_kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = dil_kH - 1;
}
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padH must be >= -2");
%(fail)s
}
if (padW == -1) { // horizontal half padding
padW = dil_kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = dil_kW - 1;
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padW must be >= -2");
%(fail)s
}
if (padD == -1) { // depth half padding
padD = dil_kD / 2;
}
else if (padD == -2) { // depth full padding
padD = dil_kD - 1;
}
else if (padD < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padD must be >= -2");
%(fail)s
}
// Infer output shape
npy_intp out_dim[5];
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width, depth)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[4] = (npy_intp)kD;
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
%(fail)s
}
// Prepare output array
int typenum;
if ( !(%(out)s
&& PyArray_NDIM(%(out)s)==4
&& PyArray_IS_C_CONTIGUOUS(%(out)s)
&& PyArray_DIMS(%(out)s)[0]==out_dim[0]
&& PyArray_DIMS(%(out)s)[1]==out_dim[1]
&& PyArray_DIMS(%(out)s)[2]==out_dim[2]
&& PyArray_DIMS(%(out)s)[3]==out_dim[3]
&& PyArray_DIMS(%(out)s)[4]==out_dim[4]))
{
Py_XDECREF(%(out)s);
if (direction != 1) {
typenum = PyArray_TYPE(weights);
}
else {
typenum = PyArray_TYPE(bottom);
}
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
%(out)s = (PyArrayObject*)PyArray_ZEROS(5,
out_dim,
typenum,
0);
if (NULL == %(out)s)
{
PyErr_Format(PyExc_RuntimeError,
"BaseCorr3dMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld x %%lld",
(long long)out_dim[0], (long long)out_dim[1],
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4]);
%(fail)s
}
}
// Call corr3dMM code
out2 = corr3dMM(%(bottom)s, %(weights)s, %(top)s, direction,
dH, dW, dD, dilH, dilW, dilD, padH, padW, padD);
if (out2==NULL){
%(fail)s
}
assert (out2 == %(out)s);
""" % sub
class Corr3dMM(BaseCorr3dMM):
"""
CPU correlation implementation using Matrix Multiplication.
Parameters
----------
border_mode
The width of a border of implicit zeros to pad the
input with. Must be a tuple with 3 elements giving the width of
the padding on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime:
``'valid'`` for ``(0, 0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1, kernel_depth - 1)``
(full convolution), ``'half'`` for ``(kernel_rows // 2,
kernel_columns // 2, kernel_depth // 2)`` (same convolution for
odd-sized kernels). Note that the three widths are each
applied twice, once per side (left and right, top and bottom, front
and back).
subsample
The subsample operation applied to each output image. Should be a tuple
with 3 elements. Set to `(1, 1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 3 elements.
Set to `(1, 1, 1)` to disable filter dilation.
"""
def make_node(self, img, kern):
img = as_tensor_variable(img)
kern = as_tensor_variable(kern)
img, kern = self.as_common_dtype(img, kern)
if img.type.ndim != 5:
raise TypeError('img must be 5D tensor')
if kern.type.ndim != 5:
raise TypeError('kern must be 5D tensor')
broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
False, False, False]
dtype = img.type.dtype
return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
imshp = input_shape[0]
kshp = input_shape[1]
res = get_conv_output_shape(
imshp,
kshp,
self.border_mode,
self.subsample,
self.filter_dilation)
return [res]
def c_code(self, node, nodename, inp, out_, sub):
bottom, weights = inp
top, = out_
direction = "forward"
return super(Corr3dMM, self).c_code_helper(bottom, weights, top, direction, sub)
def grad(self, inp, grads):
bottom, weights = inp
top, = grads
d_bottom = Corr3dMM_gradInputs(self.border_mode,
self.subsample,
self.filter_dilation)(weights, top,
bottom.shape[-3:])
d_weights = Corr3dMM_gradWeights(self.border_mode,
self.subsample,
self.filter_dilation)(bottom, top,
weights.shape[-3:])
return d_bottom, d_weights
class Corr3dMM_gradWeights(BaseCorr3dMM):
"""
Gradient wrt. filters for `Corr3dMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def make_node(self, img, topgrad, shape=None):
img = as_tensor_variable(img)
topgrad = as_tensor_variable(topgrad)
img, topgrad = self.as_common_dtype(img, topgrad)
if img.type.ndim != 5:
raise TypeError('img must be 5D tensor')
if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) or self.border_mode == "half":
if shape is None:
raise ValueError('shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"')
height_width_depth = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64'),
as_tensor_variable(shape[2]).astype('int64')]
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False, False]
dtype = img.type.dtype
return Apply(self, [img, topgrad] + height_width_depth,
[TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = padD = -1
elif self.border_mode == "full":
padH = padW = padD = -2
elif isinstance(self.border_mode, tuple):
padH, padW, padD = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = padD = 0
dH, dW, dD = self.subsample
imshp = input_shape[0]
topshp = input_shape[1]
ssize, imshp = imshp[1], list(imshp[2:])
nkern, topshp = topshp[1], list(topshp[2:])
height_width_depth = node.inputs[-3:]
if ((dH != 1) or (padH == -1)):
# vertical subsampling or half padding, kernel height is specified
kH = height_width_depth[0]
elif padH == -2:
# vertical full padding, we can infer the kernel height
kH = 2 - imshp[0] + (topshp[0] - 1) * dH
else:
# explicit padding, we can infer the kernel height
kH = imshp[0] + 2 * padH - (topshp[0] - 1) * dH
if ((dW != 1) or (padW == -1)):
kW = height_width_depth[1]
elif (padW == -2):
kW = 2 - imshp[1] + (topshp[1] - 1) * dW
else:
kW = imshp[1] + 2 * padW - (topshp[1] - 1) * dW
if ((dD != 1) or (padD == -1)):
kD = height_width_depth[2]
elif (padD == -2):
kD = 2 - imshp[2] + (topshp[2] - 1) * dD
else:
kD = imshp[2] + 2 * padD - (topshp[2] - 1) * dD
return [(nkern, ssize, kH, kW, kD)]
def c_code(self, node, nodename, inp, out_, sub):
bottom, top = inp[:2]
height, width, depth = inp[2:] or (None, None, None)
weights, = out_
direction = "backprop weights"
return super(Corr3dMM_gradWeights,
self).c_code_helper(bottom, weights, top, direction,
sub, height, width, depth)
def grad(self, inp, grads):
bottom, top = inp[:2]
weights, = grads
d_bottom = Corr3dMM_gradInputs(self.border_mode,
self.subsample,
self.filter_dilation)(weights, top,
bottom.shape[-3:])
d_top = Corr3dMM(self.border_mode,
self.subsample,
self.filter_dilation)(bottom, weights)
d_height_width_depth = ((theano.gradient.DisconnectedType()(),) * 3
if len(inp) == 5 else ())
return (d_bottom, d_top) + d_height_width_depth
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
class Corr3dMM_gradInputs(BaseCorr3dMM):
"""
Gradient wrt. inputs for `Corr3dMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def make_node(self, kern, topgrad, shape=None):
kern = as_tensor_variable(kern)
topgrad = as_tensor_variable(topgrad)
kern, topgrad = self.as_common_dtype(kern, topgrad)
if kern.type.ndim != 5:
raise TypeError('kern must be 5D tensor')
if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) and shape is None:
raise ValueError('shape must be given if subsample != (1, 1, 1)')
if self.subsample != (1, 1, 1):
height_width_depth = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64'),
as_tensor_variable(shape[2]).astype('int64')]
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False, False]
dtype = kern.type.dtype
return Apply(self, [kern, topgrad] + height_width_depth,
[TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = padD = -1
elif self.border_mode == "full":
padH = padW = padD = -2
elif isinstance(self.border_mode, tuple):
padH, padW, padD = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = padD = 0
dH, dW, dD = self.subsample
kshp = input_shape[0]
topshp = input_shape[1]
ssize, kshp = kshp[1], list(kshp[2:])
bsize, topshp = topshp[0], list(topshp[2:])
height_width_depth = node.inputs[-3:]
if padH == -1:
padH = kshp[0] // 2
elif padH == -2:
padH = kshp[0] - 1
elif padH < -2:
raise ValueError('Corr3dMM_gradInputs: border_mode must be >= 0.')
if padW == -1:
padW = kshp[1] // 2
elif padW == -2:
padW = kshp[1] - 1
elif padW < -2:
raise ValueError('Corr3dMM_gradInputs: border_mode must be >= 0.')
if padD == -1:
padD = kshp[2] // 2
elif padD == -2:
padD = kshp[2] - 1
elif padD < -2:
raise ValueError('Corr3dMM_gradInputs: border_mode must be >= 0.')
if dH != 1:
out_shp0 = height_width_depth[0]
else:
out_shp0 = (topshp[0] - 1) * dH + kshp[0] - 2 * padH
if dW != 1:
out_shp1 = height_width_depth[1]
else:
out_shp1 = (topshp[1] - 1) * dW + kshp[1] - 2 * padW
if dD != 1:
out_shp2 = height_width_depth[2]
else:
out_shp2 = (topshp[2] - 1) * dD + kshp[2] - 2 * padD
out_shp = (out_shp0, out_shp1, out_shp2)
return [(bsize, ssize) + out_shp]
def c_code(self, node, nodename, inp, out_, sub):
weights, top = inp[:2]
height, width, depth = inp[2:] or (None, None, None)
bottom, = out_
direction = "backprop inputs"
return super(Corr3dMM_gradInputs,
self).c_code_helper(bottom, weights, top, direction, sub,
height, width, depth)
def grad(self, inp, grads):
weights, top = inp[:2]
bottom, = grads
d_weights = Corr3dMM_gradWeights(self.border_mode,
self.subsample,
self.filter_dilation)(bottom,
top,
weights.shape[-3:])
d_top = Corr3dMM(self.border_mode,
self.subsample,
self.filter_dilation)(bottom, weights)
d_height_width_depth = ((theano.gradient.DisconnectedType()(),) * 3
if len(inp) == 5 else ())
return (d_weights, d_top) + d_height_width_depth
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
// This uses a lot of code from Caffe (http://caffe.berkeleyvision.org/);
// sources are clearly marked. Below we reproduce the original license of
// the Caffe software.
/*
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp)
// Loops for fast unfold + copy
void im3d2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int depth,
const int kernel_h, const int kernel_w, const int kernel_d,
const int dilation_h, const int dilation_w, const int dilation_d,
const int pad_h, const int pad_w, const int pad_d,
const int stride_h, const int stride_w, const int stride_d,
%(float_type)s* data_col) {
// Implicit dilated kernel size
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int dil_kernel_d = (kernel_d - 1) * dilation_d + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int depth_col = (depth + 2 * pad_d - dil_kernel_d) / stride_d + 1;
int channels_col = channels * kernel_h * kernel_w * kernel_d;
for (int c = 0; c < channels_col; ++c) {
int d_offset = c %% kernel_d;
int w_offset = (c / kernel_d) %% kernel_w;
int h_offset = (c / kernel_w / kernel_d) %% kernel_h;
int c_im = c / kernel_h / kernel_w / kernel_d;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
for (int d = 0; d < depth_col; ++d) {
int d_pad = d * stride_d - pad_d + d_offset * dilation_d;
if (h_pad >= 0 && h_pad < height
&& w_pad >= 0 && w_pad < width
&& d_pad >= 0 && d_pad < depth)
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d] =
data_im[(npy_intp)((c_im * height + h_pad) * width + w_pad) * depth + d_pad];
else
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d] = 0.;
}
}
}
}
}
// Unlike the Caffe and Theano GPU verions, the data_im array is set to zero
// before the col2im call rather than doing it here. So, the result is just
// accumulated into data_im.
void col2im3d(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int depth,
const int patch_h, const int patch_w, const int patch_d,
const int dilation_h, const int dilation_w, const int dilation_d,
const int pad_h, const int pad_w, const int pad_d,
const int stride_h, const int stride_w, const int stride_d,
%(float_type)s* data_im) {
// Implicit dilated patch
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int dil_patch_d = (patch_d - 1) * dilation_d + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int depth_col = (depth + 2 * pad_d - dil_patch_d) / stride_d + 1;
int num_kernels = channels * height * width * depth;
int channels_col = channels * patch_h * patch_w * patch_d;
for (int c = 0; c < channels_col; ++c) {
int d_offset = c %% patch_d;
int w_offset = (c / patch_d) %% patch_w;
int h_offset = (c / patch_w / patch_d) %% patch_h;
int c_im = c / patch_h / patch_w / patch_d;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
for (int d = 0; d < depth_col; ++d) {
int d_pad = d * stride_d - pad_d + d_offset * dilation_d;
if (h_pad >= 0 && h_pad < height
&& w_pad >= 0 && w_pad < width
&& d_pad >= 0 && d_pad < depth)
data_im[(npy_intp)((c_im * height + h_pad) * width + w_pad) * depth + d_pad] +=
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d];
}
}
}
}
}
// Theano op code
// GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
// Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject* corr3dMM(PyArrayObject* bottom,
PyArrayObject* weight,
PyArrayObject* top,
const int direction,
const int dH = 1,
const int dW = 1,
const int dD = 1,
const int dilH = 1,
const int dilW = 1,
const int dilD = 1,
const int padH = 0,
const int padW = 0,
const int padD = 0)
{
if (PyArray_NDIM(bottom) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires bottom of 5D");
return NULL;
}
if (PyArray_TYPE(bottom) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received bottom with wrong type.");
return NULL;
}
if (PyArray_NDIM(weight) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires weight of 5D");
return NULL;
}
if (PyArray_TYPE(weight) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received weight with wrong type.");
return NULL;
}
if (PyArray_NDIM(top) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires top of 5D");
return NULL;
}
if (PyArray_TYPE(top) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received top with wrong type.");
return NULL;
}
// Ensure data is contiguous
bottom = PyArray_GETCONTIGUOUS(bottom);
weight = PyArray_GETCONTIGUOUS(weight);
top = PyArray_GETCONTIGUOUS(top);
// Extract some shape information for later and check shape consistency
// bottom: (batchSize, nChannels, bottomHeight, bottomWidth, bottomDepth)
const int batchSize = PyArray_DIMS(bottom)[0];
const int nChannels = PyArray_DIMS(bottom)[1];
const int bottomHeight = PyArray_DIMS(bottom)[2];
const int bottomWidth = PyArray_DIMS(bottom)[3];
const int bottomDepth = PyArray_DIMS(bottom)[4];
// weights: (nFilters, nChannels, rows, columns, slices)
const int nFilters = PyArray_DIMS(weight)[0];
const int kH = PyArray_DIMS(weight)[2];
const int kW = PyArray_DIMS(weight)[3];
const int kD = PyArray_DIMS(weight)[4];
if (nChannels != PyArray_DIMS(weight)[1]) {
PyErr_SetString(PyExc_ValueError,
"Corr3dMM images and kernel must have the same stack size\n");
return NULL;
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1;
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
topWidth != PyArray_DIMS(top)[3] ||
topDepth != PyArray_DIMS(top)[4]) {
PyErr_Format(PyExc_ValueError,
"Corr3dMM shape inconsistency:\n"
" bottom shape: %%d %%d %%d %%d %%d\n"
" weight shape: %%d %%d %%d %%d %%d\n"
" top shape: %%ld %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d %%d)\n",
batchSize, nChannels, bottomHeight, bottomWidth, bottomDepth,
nFilters, nChannels, kH, kW, kD,
PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
PyArray_DIMS(top)[2], PyArray_DIMS(top)[3], PyArray_DIMS(top)[4],
batchSize, nFilters, topHeight, topWidth, topDepth);
return NULL;
}
// Create temporary columns
int max_threads = %(omp_get_max_threads)s;
if (batchSize < max_threads) {
max_threads = batchSize;
}
npy_intp col_dim[3];
col_dim[0] = (npy_intp)max_threads;
col_dim[1] = (npy_intp)(nChannels * kW * kH * kD);
col_dim[2] = (npy_intp)(topHeight * topWidth * topDepth);
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
PyArrayObject* col = (PyArrayObject*)PyArray_ZEROS(3,
col_dim,
PyArray_TYPE(top),
0);
if (NULL == col) {
PyErr_Format(PyExc_RuntimeError,
"Corr3dMM failed to allocate working memory of"
" %%ld x %%ld x %%ld\n",
col_dim[0], col_dim[1], col_dim[2]);
return NULL;
}
// Define some useful variables
const int bottom_stride = PyArray_STRIDES(bottom)[0]/%(n_bytes)f;
const int top_stride = PyArray_STRIDES(top)[0]/%(n_bytes)f;
const int K_ = col_dim[1];
const int N_ = col_dim[2];
const int col_stride = (K_ * N_);
const int M_ = nFilters;
const %(c_float_type)s one = 1.0;
const %(c_float_type)s zero = 0.0;
char NTrans = 'N';
char Trans = 'T';
PyArrayObject *output;
if (direction == 0) { // forward pass
output = top;
// valid correlation: im3d2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enalbed for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im3d2col
im3d2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels,
bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm
%(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_,
&one,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight), &K_,
&zero,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_);
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
npy_intp weight_dim[2];
weight_dim[0] = (npy_intp)max_threads;
weight_dim[1] = (npy_intp)(M_ * K_);
PyArrayObject* local_weight = (PyArrayObject*)PyArray_ZEROS(2,
weight_dim, PyArray_TYPE(weight), 0);
if (NULL == local_weight)
{
PyErr_Format(PyExc_RuntimeError,
"Corr3dMM failed to allocate weight memory of %%ld x %%ld\n",
weight_dim[0], weight_dim[1]);
return NULL;
}
// valid convolution: im2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enalbed for best and stable performance.
%(blas_set_num_threads)s(1);
// OMP for batch-level paralization
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im2col
im3d2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels,
bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%(gemm)s(&Trans, &NTrans,
&K_, &M_, &N_,
&one,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride, &N_,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_,
(n == 0) ? &zero : &one,
(%(float_type)s*)PyArray_DATA(local_weight) +
tid * weight_dim[1], &K_);
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
//aggregate weights
memset((%(float_type)s*)PyArray_DATA(weight), 0, M_ * K_*sizeof(%(float_type)s));
/*
* Put index "j" into outer loop to get the
* correct result when openmp is used.
*/
%(omp_flags)s
for(int j = 0; j < weight_dim[1]; ++j){
for(int i = 0; i < max_threads; ++i){
((%(float_type)s*)PyArray_DATA(weight))[j] +=
*((%(float_type)s*)PyArray_DATA(local_weight) +
i * weight_dim[1] + j);
}
}
Py_DECREF(local_weight);
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE(bottom, 0);
// full convolution: gemm, then col2im3d
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enalbed for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
// gemm into columns
int tid = %(omp_get_thread_num)s;
%(gemm)s(&NTrans, &Trans,
&N_, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight), &K_,
&zero,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride, &N_);
// col2im back to the data
col2im3d((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels,
bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride);
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
}
// Free temporary columns
Py_DECREF(col);
// decref from contiguous check
Py_DECREF(bottom);
Py_DECREF(weight);
Py_DECREF(top);
// Note that we don't change the refcount of the output matrix here. Output
// (re)allocation and refcounting is done in BaseCorr3dMM.c_code_helper();
// in here output is just aliased to one of bottom, weights, or top.
return output;
}
...@@ -10,6 +10,8 @@ from theano.gof.opt import copy_stack_trace ...@@ -10,6 +10,8 @@ from theano.gof.opt import copy_stack_trace
from theano.tensor.nnet.corr import ( from theano.tensor.nnet.corr import (
CorrMM, CorrMM_gradInputs, CorrMM_gradWeights) CorrMM, CorrMM_gradInputs, CorrMM_gradWeights)
from theano.tensor.nnet.corr3d import (
Corr3dMM, Corr3dMM_gradInputs, Corr3dMM_gradWeights)
from theano.tensor.nnet.blocksparse import ( from theano.tensor.nnet.blocksparse import (
SparseBlockGemv, SparseBlockGemv,
SparseBlockOuter, SparseBlockOuter,
...@@ -90,6 +92,28 @@ def local_abstractconv_gemm(node): ...@@ -90,6 +92,28 @@ def local_abstractconv_gemm(node):
return [rval] return [rval]
@local_optimizer([AbstractConv3d])
def local_abstractconv3d_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d):
return None
img, kern = node.inputs
if not isinstance(img.type, TensorType) or \
not isinstance(kern.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMM(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, kern)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_abstractconv_gradweight_gemm(node): def local_abstractconv_gradweight_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags: if theano.config.cxx == "" or not theano.config.blas.ldflags:
...@@ -115,6 +139,31 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -115,6 +139,31 @@ def local_abstractconv_gradweight_gemm(node):
return [rval] return [rval]
@local_optimizer([AbstractConv3d_gradWeights])
def local_abstractconv3d_gradweight_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d_gradWeights):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, TensorType) or \
not isinstance(topgrad.type, TensorType):
return None
rval = Corr3dMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary
if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1, ::-1]
rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_abstractconv_gradinputs_gemm(node): def local_abstractconv_gradinputs_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags: if theano.config.cxx == "" or not theano.config.blas.ldflags:
...@@ -138,6 +187,29 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -138,6 +187,29 @@ def local_abstractconv_gradinputs_gemm(node):
return [rval] return [rval]
@local_optimizer([AbstractConv3d_gradInputs])
def local_abstractconv3d_gradinputs_gemm(node):
if theano.config.cxx == "" or not theano.config.blas.ldflags:
return
if not isinstance(node.op, AbstractConv3d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, TensorType) or \
not isinstance(topgrad.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(kern, topgrad,
shape)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d])
def local_conv2d_cpu(node): def local_conv2d_cpu(node):
...@@ -481,6 +553,14 @@ conv_groupopt.register('local_abstractconv_gradweight_gemm', ...@@ -481,6 +553,14 @@ conv_groupopt.register('local_abstractconv_gradweight_gemm',
conv_groupopt.register('local_abstractconv_gradinputs_gemm', conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30, local_abstractconv_gradinputs_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run') 'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gemm', local_abstractconv3d_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gradweight_gemm',
local_abstractconv3d_gradweight_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv3d_gradinputs_gemm',
local_abstractconv3d_gradinputs_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run')
# Legacy convolution # Legacy convolution
conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40, conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
'fast_compile', 'fast_run') 'fast_compile', 'fast_run')
......
...@@ -20,6 +20,8 @@ from theano.tensor.nnet.abstract_conv import bilinear_upsampling ...@@ -20,6 +20,8 @@ from theano.tensor.nnet.abstract_conv import bilinear_upsampling
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights, from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
CorrMM_gradInputs) CorrMM_gradInputs)
from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
Corr3dMM_gradInputs)
from theano.tensor.nnet.Conv3D import Conv3D from theano.tensor.nnet.Conv3D import Conv3D
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.tensor.nnet.ConvTransp3D import ConvTransp3D from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
...@@ -734,11 +736,9 @@ class TestCorrConv3d(BaseTestConv3d): ...@@ -734,11 +736,9 @@ class TestCorrConv3d(BaseTestConv3d):
BaseTestConv3d.setup_class() BaseTestConv3d.setup_class()
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)): def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
if b not in ((0, 0, 0), 'valid'):
raise SkipTest("Only border_mode valid is implemented for basic cpu Conv3D.")
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for basic cpu Conv3D.")
o = self.get_output_shape(i, f, s, b, fd) o = self.get_output_shape(i, f, s, b, fd)
if fd != (1, 1, 1):
raise SkipTest("No reference implementation for 3D dilation.")
if (not theano.config.blas.ldflags or if (not theano.config.blas.ldflags or
not theano.config.cxx or not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"): theano.config.mode == "FAST_COMPILE"):
...@@ -746,17 +746,17 @@ class TestCorrConv3d(BaseTestConv3d): ...@@ -746,17 +746,17 @@ class TestCorrConv3d(BaseTestConv3d):
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s, self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, provide_shape=provide_shape, verify_grad=True, provide_shape=provide_shape,
border_mode=b, filter_flip=flip, border_mode=b, filter_flip=flip,
target_op=Conv3D, check_trace=True, target_op=Corr3dMM, check_trace=True,
filter_dilation=fd) filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True, output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=ConvGrad3D, filter_flip=flip, target_op=Corr3dMM_gradWeights,
check_trace=True, filter_dilation=fd) check_trace=True, filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True, output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=ConvTransp3D, filter_flip=flip, target_op=Corr3dMM_gradInputs,
check_trace=True, filter_dilation=fd) check_trace=True, filter_dilation=fd)
...@@ -764,7 +764,6 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -764,7 +764,6 @@ class TestCpuConv3d(BaseTestConv3d):
@classmethod @classmethod
def setup(cls): def setup(cls):
BaseTestConv3d.setup_class() BaseTestConv3d.setup_class()
# TODO check how conv_gemm works for conv3d
cls.mode = theano.compile.mode.get_default_mode().excluding('conv_gemm') cls.mode = theano.compile.mode.get_default_mode().excluding('conv_gemm')
cls.opt_err = theano.config.on_opt_error cls.opt_err = theano.config.on_opt_error
theano.config.on_opt_error = 'ignore' theano.config.on_opt_error = 'ignore'
......
from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr
from nose.tools import assert_equals
import numpy
from six import integer_types
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr3d, conv
class TestCorr3D(utt.InferShapeTester):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN")
else:
mode = None
dtype = theano.config.floatX
def setUp(self):
super(TestCorr3D, self).setUp()
self.input = T.tensor5('input', dtype=self.dtype)
self.input.name = 'default_V'
self.filters = T.tensor5('filters', dtype=self.dtype)
self.filters.name = 'default_filters'
if not conv.imported_scipy_signal and theano.config.cxx == "":
raise SkipTest("Corr3dMM tests need SciPy or a c++ compiler")
if not theano.config.blas.ldflags:
raise SkipTest("Corr3dMM tests need a BLAS")
def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1, 1),
input=None, filters=None, verify_grad=True,
non_contiguous=False, filter_dilation=(1, 1, 1)):
"""
:param image_shape: The constant shape info passed to corr3dMM.
:param filter_shape: The constant shape info passed to corr3dMM.
"""
N_image_shape = [T.get_scalar_constant_value(T.as_tensor_variable(x))
for x in image_shape]
N_filter_shape = [T.get_scalar_constant_value(T.as_tensor_variable(x))
for x in filter_shape]
if input is None:
input = self.input
if filters is None:
filters = self.filters
# THEANO IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def sym_Corr3dMM(input, filters):
# define theano graph and function
input.name = 'input'
filters.name = 'filters'
rval = corr3d.Corr3dMM(border_mode, subsample,
filter_dilation)(input, filters)
rval.name = 'corr_output'
return rval
output = sym_Corr3dMM(input, filters)
output.name = 'Corr3dMM()(%s,%s)' % (input.name, filters.name)
theano_corr = theano.function([input, filters], output, mode=self.mode)
# initialize input and compute result
image_data = numpy.random.random(N_image_shape).astype(self.dtype)
filter_data = numpy.random.random(N_filter_shape).astype(self.dtype)
image_data /= 10
filter_data /= 10
if non_contiguous:
image_data = numpy.transpose(image_data, axes=(0, 1, 4, 3, 2))
image_data = image_data.copy()
image_data = numpy.transpose(image_data, axes=(0, 1, 4, 3, 2))
filter_data = numpy.transpose(filter_data, axes=(0, 1, 4, 3, 2))
filter_data = filter_data.copy()
filter_data = numpy.transpose(filter_data, axes=(0, 1, 4, 3, 2))
assert not image_data.flags['CONTIGUOUS']
assert not filter_data.flags['CONTIGUOUS']
theano_output = theano_corr(image_data, filter_data)
# REFERENCE IMPLEMENTATION
# Testing correlation, not convolution. Reverse filters.
filter_data_corr = numpy.array(filter_data[:, :, ::-1, ::-1, ::-1],
copy=True,
order='C')
orig_image_data = image_data
img_shape3d = numpy.array(N_image_shape[-3:])
fil_shape3d = numpy.array(N_filter_shape[-3:])
dil_shape3d = numpy.array(filter_dilation)
dil_fil_shape3d = (fil_shape3d - 1) * dil_shape3d + 1
subsample3d = numpy.array(subsample)
if border_mode == 'full':
padHWD = (dil_fil_shape3d - 1)
elif border_mode == 'valid':
padHWD = numpy.array([0, 0, 0])
elif border_mode == 'half':
padHWD = numpy.floor(dil_fil_shape3d / 2).astype('int32')
elif isinstance(border_mode, tuple):
padHWD = numpy.array(border_mode)
elif isinstance(border_mode, integer_types):
padHWD = numpy.array([border_mode, border_mode, border_mode])
else:
raise NotImplementedError('Unsupported border_mode {}'.format(border_mode))
out_shape3d = numpy.floor((img_shape3d + 2 * (padHWD) - dil_fil_shape3d) / subsample3d) + 1
# avoid numpy deprecation
out_shape3d = out_shape3d.astype('int32')
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape3d)
ref_output = numpy.zeros(out_shape)
# loop over output feature maps
ref_output.fill(0)
image_data2 = numpy.zeros((N_image_shape[0], N_image_shape[1],
N_image_shape[2] + 2 * padHWD[0],
N_image_shape[3] + 2 * padHWD[1],
N_image_shape[4] + 2 * padHWD[2]))
image_data2[:, :,
padHWD[0]:padHWD[0] + N_image_shape[2],
padHWD[1]:padHWD[1] + N_image_shape[3],
padHWD[2]:padHWD[2] + N_image_shape[4]] = image_data
image_data = image_data2
N_image_shape = image_data.shape
for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
for im0 in range(N_image_shape[1]):
filter3d = filter_data_corr[nn, im0, :, :, :]
image3d = image_data[bb, im0, :, :, :]
for row in range(ref_output.shape[2]):
irow = row * subsample[0] # image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
for slc in range(ref_output.shape[4]):
islc = slc * subsample[2] # image slice
ref_output[bb, nn, row, col, slc] += (image3d[
irow:irow + dil_fil_shape3d[0]:filter_dilation[0],
icol:icol + dil_fil_shape3d[1]:filter_dilation[1],
islc:islc + dil_fil_shape3d[2]:filter_dilation[2]
] * filter3d[::-1, ::-1, ::-1]
).sum()
utt.assert_allclose(theano_output, ref_output)
# TEST GRADIENT
if verify_grad:
utt.verify_grad(sym_Corr3dMM, [orig_image_data, filter_data],
mode=self.mode)
@attr('slow')
def test_basic(self):
"""
Tests that basic correlations work for odd and even
dimensions of image and filter shapes, as well as rectangular
images and filters.
"""
border_modes = ['valid', 'full', 'half', (1, 1, 1),
(2, 1, 1), (1, 2, 1), (1, 1, 2),
(3, 3, 3), 1]
img_shapes = [(2, 2, 3, 3, 3), (3, 2, 8, 8, 8), (3, 2, 7, 5, 5), (3, 2, 7, 5, 5),
(1, 2, 8, 8, 8), (1, 2, 7, 5, 5)]
fil_shapes = [(2, 2, 2, 2, 2), (1, 2, 5, 5, 5), (2, 2, 2, 3, 2), (2, 2, 3, 2, 2),
(1, 2, 5, 5, 5), (1, 2, 2, 3, 3)]
for border_mode in border_modes:
for img, fil in zip(img_shapes, fil_shapes):
self.validate(img, fil, border_mode, verify_grad=False)
# Very slow on with 'full' or 'half'
self.validate((1, 10, 213, 129, 129), (46, 10, 212, 1, 1), 'valid', verify_grad=False)
def test_img_kernel_same_shape(self):
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 'full')
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 'valid')
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 'half')
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), (1, 1, 1))
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 1)
@attr('slow')
def test_subsample(self):
"""
Tests correlation where subsampling != (1,1,1)
"""
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'valid', subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'valid', subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 'valid', subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'full', subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'full', subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 'full', subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'half', subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'half', subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 'half', subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 2), subsample=(3, 3, 3))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 1, subsample=(3, 3, 3))
def test_filter_dilation(self):
"""
Tests correlation where filter dilation != (1,1,1)
"""
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'valid', filter_dilation=(2, 2, 2))
self.validate((3, 2, 14, 10, 10), (2, 2, 2, 3, 3), 'valid', filter_dilation=(3, 1, 1))
self.validate((1, 1, 14, 14, 14), (1, 1, 3, 3, 3), 'valid', filter_dilation=(2, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'full', filter_dilation=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'full', filter_dilation=(3, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 'full', filter_dilation=(2, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'half', filter_dilation=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), 'half', filter_dilation=(3, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 'half', filter_dilation=(2, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), filter_dilation=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), filter_dilation=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 1), filter_dilation=(1, 2, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 1, 2), filter_dilation=(1, 1, 2))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 1, subsample=(3, 3, 3), filter_dilation=(2, 2, 2))
@attr('slow')
def test_shape_Constant_tensor(self):
"""
Tests correlation where the {image,filter}_shape is a Constant tensor.
"""
as_t = T.as_tensor_variable
border_modes = ['valid', 'full', 'half', (1, 1, 1), (2, 1, 1),
(1, 2, 1), (1, 1, 2), (3, 3, 3), 1]
for border_mode in border_modes:
self.validate((as_t(3), as_t(2), as_t(7), as_t(5), as_t(5)),
(5, 2, 2, 3, 3), border_mode)
self.validate(as_t([3, 2, 7, 5, 5]), (5, 2, 2, 3, 3), border_mode)
self.validate(as_t((3, 2, 7, 5, 5)), (5, 2, 2, 3, 3), border_mode)
self.validate((3, 2, 7, 5, 5), (as_t(5), as_t(2), as_t(2),
as_t(3), as_t(3)), 'valid')
self.validate((3, 2, 7, 5, 5), as_t([5, 2, 2, 3, 3]), border_mode)
self.validate(as_t([3, 2, 7, 5, 5]), as_t([5, 2, 2, 3, 3]), border_mode)
def test_invalid_filter_shape(self):
"""
Tests scenario where filter_shape[1] != input_shape[1]
"""
self.assertRaises(ValueError, self.validate,
(3, 2, 8, 8, 8), (4, 3, 5, 5, 8),
'valid')
def test_full_mode(self):
"""
Tests basic correlation in full mode and case where filter
is larger than the input image.
"""
self.validate((3, 2, 5, 5, 5), (4, 2, 8, 8, 8), 'full')
def f():
self.validate((3, 2, 5, 5, 5), (4, 2, 8, 8, 8), 'valid')
self.assertRaises(Exception, f)
def test_wrong_input(self):
"""
Make sure errors are raised when image and kernel are not 5D tensors
"""
self.assertRaises(Exception, self.validate, (3, 2, 8, 8, 8), (4, 2, 5, 5, 5),
'valid', input=T.dmatrix())
self.assertRaises(Exception, self.validate, (3, 2, 8, 8, 8), (4, 2, 5, 5, 5),
'valid', filters=T.dvector())
self.assertRaises(Exception, self.validate, (3, 2, 8, 8, 8), (4, 2, 5, 5, 5),
'valid', input=T.dtensor3())
self.assertRaises(Exception, self.validate, (3, 2, 8, 8, 8), (4, 2, 5, 5, 5),
'valid', input=T.dtensor4())
def test_dtype_upcast(self):
"""
Checks dtype upcast for Corr3dMM methods.
"""
def rand(shape, dtype='float64'):
r = numpy.asarray(numpy.random.rand(*shape), dtype=dtype)
return r * 2 - 1
ops = [corr3d.Corr3dMM, corr3d.Corr3dMM_gradWeights, corr3d.Corr3dMM_gradInputs]
a_shapes = [[4, 5, 6, 3, 3], [1, 5, 6, 3, 3], [1, 5, 6, 3, 3]]
b_shapes = [[7, 5, 3, 2, 2], [1, 5, 3, 1, 1], [7, 1, 3, 1, 1]]
dtypes = ['float32', 'float64']
for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
for a_dtype in dtypes:
for b_dtype in dtypes:
c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
a_tens = T.tensor5(dtype=a_dtype)
b_tens = T.tensor5(dtype=b_dtype)
a_tens_val = rand(a_shape, dtype=a_dtype)
b_tens_val = rand(b_shape, dtype=b_dtype)
c_tens = op()(a_tens, b_tens)
f = theano.function([a_tens, b_tens], c_tens, mode=self.mode)
assert_equals(f(a_tens_val, b_tens_val).dtype, c_dtype)
@attr('slow')
def test_infer_shape_forward(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("Corr3dMM don't work in FAST_COMPILE")
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
adtens = T.dtensor5()
bdtens = T.dtensor5()
aivec_vals = [[4, 5, 6, 3, 3], [6, 2, 8, 3, 3], [3, 6, 7, 5, 5],
[3, 6, 7, 5, 5], [5, 2, 4, 3, 3]]
bivec_vals = [[7, 5, 3, 2, 2], [4, 2, 5, 3, 3], [5, 6, 3, 2, 2],
[5, 6, 2, 3, 3], [6, 2, 4, 3, 3]]
modes = ['valid', 'full', 'half', (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
self._compile_and_check([adtens, bdtens],
[cdtens],
[adtens_val, bdtens_val], corr3dMM,
warn=False)
@attr('slow')
def test_infer_shape_gradW(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("Corr3dMM don't work in FAST_COMPILE")
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
gradW = corr3d.Corr3dMM_gradWeights
adtens = T.dtensor5()
bdtens = T.dtensor5()
aivec_vals = [[1, 5, 6, 3, 3], [8, 2, 7, 3, 3], [1, 6, 9, 4, 4],
[9, 6, 8, 5, 5], [9, 1, 6, 8, 8]]
bivec_vals = [[7, 5, 3, 1, 1], [4, 2, 5, 3, 3], [12, 6, 3, 2, 2],
[5, 6, 1, 3, 3], [11, 1, 3, 3, 3]]
modes = ['valid', 'full', 'half', (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
f = theano.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# Corr3dMM_gradWeights
shape = (theano.shared(bivec_val[2]), theano.shared(bivec_val[3]),
theano.shared(bivec_val[4]))
bdtens_g = gradW(border_mode=mode,
subsample=subsample)(adtens, cdtens, shape=shape)
self._compile_and_check([adtens, cdtens],
[bdtens_g],
[adtens_val, cdtens_val], gradW,
warn=False)
@attr('slow')
def test_infer_shape_gradI(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("Corr3dMM don't work in FAST_COMPILE")
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
gradI = corr3d.Corr3dMM_gradInputs
adtens = T.dtensor5()
bdtens = T.dtensor5()
aivec_vals = [[1, 5, 6, 3, 3], [8, 2, 7, 3, 3], [1, 6, 9, 4, 4],
[9, 6, 8, 5, 5], [9, 1, 6, 8, 8]]
bivec_vals = [[7, 5, 3, 1, 1], [4, 2, 5, 3, 3], [12, 6, 3, 2, 2],
[5, 6, 1, 3, 3], [7, 1, 3, 4, 4]]
modes = ['valid', 'full', 'half', (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
f = theano.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# Corr3dMM_gradInputs
shape = (theano.shared(aivec_val[2]), theano.shared(aivec_val[3]),
theano.shared(aivec_val[4]))
adtens_g = gradI(border_mode=mode,
subsample=subsample)(bdtens, cdtens, shape=shape)
self._compile_and_check([bdtens, cdtens],
[adtens_g],
[bdtens_val, cdtens_val], gradI,
warn=False)
def test_non_contiguous(self):
self.validate((2, 2, 3, 3, 3), (2, 2, 2, 2, 2), 'valid', non_contiguous=True)
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), 'valid', non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), 'valid', non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 3, 2, 2), 'valid', non_contiguous=True)
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), 'full', non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), 'full', non_contiguous=True)
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), 'half', non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), 'half', non_contiguous=True)
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), (1, 1, 1), non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), (1, 1, 2), non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), (1, 2, 1), non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), (2, 1, 1), non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (5, 2, 2, 3, 3), 2, non_contiguous=True)
if __name__ == '__main__':
t = TestCorr3D('setUp')
t.setUp()
t.test_infer_shape()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论