提交 0c599015 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #3285 from JesseLivezey/cpu_corr3

[WIP] CpuCorrMM closes #3026 - redux
...@@ -120,6 +120,13 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -120,6 +120,13 @@ TODO: Give examples on how to use these things! They are pretty complicated.
available. To explicitly disable the graph optimizer, set available. To explicitly disable the graph optimizer, set
``THEANO_FLAGS=optimizer_excluding=conv_gemm`` in your environment. ``THEANO_FLAGS=optimizer_excluding=conv_gemm`` in your environment.
If using it, please see the warning about a bug in CUDA 5.0 to 6.0 below. If using it, please see the warning about a bug in CUDA 5.0 to 6.0 below.
- :func:`CorrMM <theano.tensor.nnet.corr.CorrMM>`
This is a CPU-only 2d correlation implementation taken from
`caffe <https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cpp>`_
and also used by Torch. It does not flip the kernel. As it provides a gradient,
you can use it as a replacement for nnet.conv2d. There is currently no
optimization to move this to GPU. This will be added when the new convolution
interface is finished.
- :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only - :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
convolution using NVIDIA's cuDNN library. This requires that you have convolution using NVIDIA's cuDNN library. This requires that you have
cuDNN installed and available, which in turn requires CUDA 6.5 and a GPU cuDNN installed and available, which in turn requires CUDA 6.5 and a GPU
......
import os
import logging
import theano
from theano import Apply
from theano import gof
from theano.tensor import as_tensor_variable, TensorType
from theano.tensor.blas_headers import blas_header_text
from theano.tensor.blas import ldflags
_logger = logging.getLogger(__name__)
class BaseCorrMM(gof.Op):
"""
Base class for `CorrMM`, `CorrMM_gradWeights` and
`CorrMM_gradInputs`. Cannot be used directly.
Parameters
----------
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
or a pair of integers
subsample
Perform subsampling of the output (default: (1, 1)).
"""
check_broadcast = False
__props__ = ('border_mode', 'subsample')
def __init__(self, border_mode="valid", subsample=(1, 1)):
if isinstance(border_mode, int):
if border_mode < 0:
raise ValueError(
'invalid border_mode {}, which must be a '
'non-negative integer'.format(border_mode))
border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple):
if len(border_mode) != 2 or border_mode[0] < 0 or border_mode[1] < 0:
raise ValueError(
'invalid border_mode {}, which must be a '
'pair of non-negative integers'.format(border_mode))
pad_h, pad_w = map(int, border_mode)
border_mode = (pad_h, pad_w)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'.format(border_mode))
self.border_mode = border_mode
if len(subsample) != 2:
raise ValueError("subsample must have two elements")
self.subsample = subsample
@property
def pad(self):
if self.border_mode != 'valid':
return self.border_mode
return (0, 0)
def __str__(self):
return '%s{%s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample))
def c_support_code(self):
return blas_header_text()
def c_libraries(self):
return ldflags()
def c_compile_args(self):
return ldflags(libs=False, flags=True)
def c_lib_dirs(self):
return ldflags(libs=False, libs_dir=True)
def c_header_dirs(self):
return ldflags(libs=False, include_dir=True)
def c_headers(self):
return ['<stdio.h>']
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (1, 0)
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# these files
sub = {}
dtype = str(node.__dict__['inputs'][0].dtype)
assert dtype in ('float32', 'float64')
if dtype == 'float32':
sub['gemm'] = 'sgemm_'
sub['float_type'] = 'npy_float'
sub['float_typenum'] = 'NPY_FLOAT'
sub['n_bytes'] = 4
sub['c_float_type'] = 'float'
else:
sub['gemm'] = 'dgemm_'
sub['float_type'] = 'npy_double'
sub['float_typenum'] = 'NPY_DOUBLE'
sub['n_bytes'] = 8
sub['c_float_type'] = 'double'
files = ['corr_gemm.c']
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files]
final_code = ''
for code in codes:
final_code += code
return final_code % sub
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
"""
This generates the C code for CorrMM (direction="forward"),
CorrMM_gradWeights (direction="backprop weights"), and
CorrMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param direction: "forward" to correlate bottom with weights and store
results in top,
"backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights,
and "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
"""
dH, dW = self.subsample
if self.border_mode == "half":
padH = padW = -1
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
if direction == "forward":
direction = 0
out = top
elif direction == "backprop weights":
direction = 1
out = weights
elif direction == "backprop inputs":
direction = 2
out = bottom
else:
raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'")
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
height = 'NULL'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else:
width = 'NULL'
sub = sub.copy()
sub.update(locals())
return """
// Mandatory args
int direction = %(direction)s; // forward, bprop weights, bprop inputs
// Optional args
int dH = %(dH)s;
int dW = %(dW)s;
int padH = %(padH)s;
int padW = %(padW)s;
PyArrayObject * bottom = %(bottom)s;
PyArrayObject * weights = %(weights)s;
PyArrayObject * top = %(top)s;
PyArrayObject * out2 = NULL;
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
kH = %(height)s;
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH = 2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH;
}
else {
// explicit padding, we can infer the kernel height
kH = PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH;
}
if ((dW != 1) || (padW == -1)) {
kW = %(width)s;
}
else if (padW == -2) {
kW = 2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW;
}
else {
kW = PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW;
}
}
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = kH - 1;
}
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
%(fail)s
}
if (padW == -1) { // horizontal half padding
padW = kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
%(fail)s
}
// Infer output shape
npy_intp out_dim[4];
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - weight) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - PyArray_DIMS(weights)[2]) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - PyArray_DIMS(weights)[3]) / dW + 1);
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = bottom + 2*pad - (top - 1) * sample
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + weights - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + PyArray_DIMS(weights)[2] - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + PyArray_DIMS(weights)[3] - 2*padW);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
%(fail)s
}
// Prepare output array
int typenum;
if ( !(%(out)s
&& PyArray_NDIM(%(out)s)==4
&& PyArray_IS_C_CONTIGUOUS(%(out)s)
&& PyArray_DIMS(%(out)s)[0]==out_dim[0]
&& PyArray_DIMS(%(out)s)[1]==out_dim[1]
&& PyArray_DIMS(%(out)s)[2]==out_dim[2]
&& PyArray_DIMS(%(out)s)[3]==out_dim[3]))
{
Py_XDECREF(%(out)s);
if (direction != 1) {
typenum = PyArray_TYPE(weights);
}
else {
typenum = PyArray_TYPE(bottom);
}
%(out)s = (PyArrayObject*)PyArray_EMPTY(4,
out_dim,
typenum,
0);
if (NULL == %(out)s)
{
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of %%d x %%d x %%d x %%d",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s
}
}
// Call corrMM code
out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, padH, padW);
if (out2==NULL){
%(fail)s
}
assert (out2 == %(out)s);
""" % sub
class CorrMM(BaseCorrMM):
"""
CPU correlation implementation using Matrix Multiplication.
Parameters
----------
border_mode
The width of a border of implicit zeros to pad the
input with. Must be a tuple with 2 elements giving the numbers of rows
and columns to pad on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime:
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each
applied twice, once per side (left and right, top and bottom).
subsample
The subsample operation applied to each output image.
Should be a tuple with 2 elements.
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
Set to `(1, 1)` to disable subsampling.
"""
def __init__(self, border_mode="valid", subsample=(1, 1)):
super(CorrMM, self).__init__(border_mode, subsample)
def make_node(self, img, kern):
img = as_tensor_variable(img)
kern = as_tensor_variable(kern)
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
False, False]
dtype = img.type.dtype
return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = -1
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
dH, dW = self.subsample
imshp = input_shape[0]
kshp = input_shape[1]
bsize, imshp = imshp[0], list(imshp[2:])
nkern, kshp = kshp[0], list(kshp[2:])
kH, kW = kshp
if padH == -1:
padH = kH // 2
elif padH == -2:
padH = kH - 1
elif padH < 0:
raise ValueError("CorrMM: border_mode must be >= 0")
if padW == -1:
padW = kW // 2
elif padW == -2:
padW = kW - 1
elif padW < 0:
raise ValueError("CorrMM: border_mode must be >= 0")
out_shp0 = (imshp[0] + 2 * padH - kshp[0]) // dH + 1
out_shp1 = (imshp[1] + 2 * padW - kshp[1]) // dW + 1
out_shp = (out_shp0, out_shp1)
return [(bsize, nkern) + out_shp]
def c_code(self, node, nodename, inp, out_, sub):
bottom, weights = inp
top, = out_
direction = "forward"
return super(CorrMM, self).c_code_helper(bottom, weights, top, direction, sub)
def grad(self, inp, grads):
bottom, weights = inp
top, = grads
d_bottom = CorrMM_gradInputs(self.border_mode,
self.subsample)(weights, top,
bottom.shape[-2:])
d_weights = CorrMM_gradWeights(self.border_mode,
self.subsample)(bottom, top,
weights.shape[-2:])
return d_bottom, d_weights
class CorrMM_gradWeights(BaseCorrMM):
"""
Gradient wrt. filters for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid", subsample=(1, 1)):
super(CorrMM_gradWeights, self).__init__(border_mode, subsample)
def make_node(self, img, topgrad, shape=None):
img = as_tensor_variable(img)
topgrad = as_tensor_variable(topgrad)
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None:
raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"')
height_width = [shape[0], shape[1]]
else:
height_width = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False]
dtype = img.type.dtype
return Apply(self, [img, topgrad] + height_width,
[TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = -1
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
dH, dW = self.subsample
imshp = input_shape[0]
topshp = input_shape[1]
ssize, imshp = imshp[1], list(imshp[2:])
nkern, topshp = topshp[1], list(topshp[2:])
height_width = node.inputs[-2:]
if ((dH != 1) or (padH == -1)):
# vertical subsampling or half padding, kernel height is specified
kH = height_width[0]
elif padH == -2:
# vertical full padding, we can infer the kernel height
kH = 2 - imshp[0] + (topshp[0] - 1) * dH
else:
# explicit padding, we can infer the kernel height
kH = imshp[0] + 2 * padH - (topshp[0] - 1) * dH
if ((dW != 1) or (padW == -1)):
kW = height_width[1]
elif (padW == -2):
kW = 2 - imshp[1] + (topshp[1] - 1) * dW
else:
kW = imshp[1] + 2 * padW - (topshp[1] - 1) * dW
return [(nkern, ssize, kH, kW)]
def c_code(self, node, nodename, inp, out_, sub):
bottom, top = inp[:2]
height, width = inp[2:] or (None, None)
weights, = out_
direction = "backprop weights"
return super(CorrMM_gradWeights,
self).c_code_helper(bottom, weights, top, direction,
sub, height, width)
def grad(self, inp, grads):
bottom, top = inp[:2]
weights, = grads
d_bottom = CorrMM_gradInputs(self.border_mode,
self.subsample)(weights, top,
bottom.shape[-2:])
d_top = CorrMM(self.border_mode,
self.subsample)(bottom, weights)
d_height_width = ((theano.gradient.DisconnectedType()(),) * 2
if len(inp) == 4 else ())
return (d_bottom, d_top) + d_height_width
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
class CorrMM_gradInputs(BaseCorrMM):
"""
Gradient wrt. inputs for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid", subsample=(1, 1)):
super(CorrMM_gradInputs, self).__init__(border_mode, subsample)
def make_node(self, kern, topgrad, shape=None):
kern = as_tensor_variable(kern)
topgrad = as_tensor_variable(topgrad)
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None:
raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [shape[0], shape[1]] if self.subsample != (1, 1) else []
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False]
dtype = kern.type.dtype
return Apply(self, [kern, topgrad] + height_width,
[TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = -1
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
dH, dW = self.subsample
kshp = input_shape[0]
topshp = input_shape[1]
ssize, kshp = kshp[1], list(kshp[2:])
bsize, topshp = topshp[0], list(topshp[2:])
height_width = node.inputs[-2:]
if padH == -1:
padH = kshp[0] // 2
elif padH == -2:
padH = kshp[0] - 1
elif padH < -2:
raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.')
if padW == -1:
padW = kshp[1] // 2
elif padW == -2:
padW = kshp[1] - 1
elif padW < -2:
raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.')
if dH != 1:
out_shp0 = height_width[0]
else:
out_shp0 = (topshp[0] - 1) * dH + kshp[0] - 2 * padH
if dW != 1:
out_shp1 = height_width[1]
else:
out_shp1 = (topshp[1] - 1) * dW + kshp[1] - 2 * padW
out_shp = (out_shp0, out_shp1)
return [(bsize, ssize) + out_shp]
def c_code(self, node, nodename, inp, out_, sub):
weights, top = inp[:2]
height, width = inp[2:] or (None, None)
bottom, = out_
direction = "backprop inputs"
return super(CorrMM_gradInputs,
self).c_code_helper(bottom, weights, top, direction, sub,
height,
width)
def grad(self, inp, grads):
weights, top = inp[:2]
bottom, = grads
d_weights = CorrMM_gradWeights(self.border_mode,
self.subsample)(bottom,
top,
weights.shape[-2:])
d_top = CorrMM(self.border_mode,
self.subsample)(bottom, weights)
d_height_width = ((theano.gradient.DisconnectedType()(),) *
2 if len(inp) == 4 else ())
return (d_weights, d_top) + d_height_width
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
// This uses a lot of code from Caffe (http://caffe.berkeleyvision.org/);
// sources are clearly marked. Below we reproduce the original license of
// the Caffe software.
/*
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp)
// Loops for fast unfold + copy
void im2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
%(float_type)s* data_col) {
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% kernel_w;
int h_offset = (c / kernel_w) %% kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0.;
}
}
}
}
// Unlike the Caffe and Theano GPU verions, the data_im array is set to zero
// before the col2im call rather than doing it here. So, the result is just
// accumulated into data_im.
void col2im(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, %(float_type)s* data_im) {
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
int num_kernels = channels * height * width;
int channels_col = channels * patch_h * patch_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% patch_w;
int h_offset = (c / patch_w) %% patch_h;
int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
}
}
}
}
// Theano op code
// GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
// Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject* corrMM(PyArrayObject* bottom,
PyArrayObject* weight,
PyArrayObject* top,
const int direction,
const int dH = 1,
const int dW = 1,
const int padH = 0,
const int padW = 0)
{
if (PyArray_NDIM(bottom) != 4)
{
PyErr_SetString(PyExc_ValueError, "CorrMM requires bottom of 4D");
return NULL;
}
if (PyArray_TYPE(bottom) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received bottom with wrong type.");
return NULL;
}
if (PyArray_NDIM(weight) != 4)
{
PyErr_SetString(PyExc_ValueError, "CorrMM requires weight of 4D");
return NULL;
}
if (PyArray_TYPE(weight) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received weight with wrong type.");
return NULL;
}
if (PyArray_NDIM(top) != 4)
{
PyErr_SetString(PyExc_ValueError, "CorrMM requires top of 4D");
return NULL;
}
if (PyArray_TYPE(top) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received top with wrong type.");
return NULL;
}
// Ensure data is contiguous
bottom = PyArray_GETCONTIGUOUS(bottom);
weight = PyArray_GETCONTIGUOUS(weight);
top = PyArray_GETCONTIGUOUS(top);
// Extract some shape information for later and check shape consistency
// bottom: (batchSize, nChannels, bottomHeight, bottomWidth)
const int batchSize = PyArray_DIMS(bottom)[0];
const int nChannels = PyArray_DIMS(bottom)[1];
const int bottomHeight = PyArray_DIMS(bottom)[2];
const int bottomWidth = PyArray_DIMS(bottom)[3];
// weights: (nFilters, nChannels, rows, columns)
const int nFilters = PyArray_DIMS(weight)[0];
const int kH = PyArray_DIMS(weight)[2];
const int kW = PyArray_DIMS(weight)[3];
if (nChannels != PyArray_DIMS(weight)[1]) {
PyErr_SetString(PyExc_ValueError,
"CorrMM images and kernel must have the same stack size\n");
return NULL;
}
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - kW) / dW + 1;
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
topWidth != PyArray_DIMS(top)[3]) {
PyErr_Format(PyExc_ValueError,
"CorrMM shape inconsistency:\n"
" bottom shape: %%d %%d %%d %%d\n"
" weight shape: %%d %%d %%d %%d\n"
" top shape: %%d %%d %%d %%d (expected %%d %%d %%d %%d)\n",
batchSize, nChannels, bottomHeight, bottomWidth,
nFilters, nChannels, kH, kW,
PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
PyArray_DIMS(top)[2], PyArray_DIMS(top)[3],
batchSize, nFilters, topHeight, topWidth);
return NULL;
}
// Create temporary columns
npy_intp col_dim[2];
col_dim[0] = (npy_intp)(nChannels * kW * kH);
col_dim[1] = (npy_intp)(topHeight * topWidth);
PyArrayObject* col = (PyArrayObject*)PyArray_EMPTY(2,
col_dim,
PyArray_TYPE(top),
0);
if (NULL == col)
{
PyErr_Format(PyExc_RuntimeError,
"CorrMM failed to allocate working memory of %%d x %%d\n",
col_dim[0], col_dim[1]);
return NULL;
}
// Define some useful variables
const int bottom_stride = PyArray_STRIDES(bottom)[0]/%(n_bytes)f;
const int top_stride = PyArray_STRIDES(top)[0]/%(n_bytes)f;
const int K_ = col_dim[0];
const int N_ = col_dim[1];
const int M_ = nFilters;
const %(c_float_type)s one = 1.0;
const %(c_float_type)s zero = 0.0;
char NTrans = 'N';
char Trans = 'T';
PyArrayObject *output;
if (direction == 0) { // forward pass
output = top;
// valid correlation: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm
%(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_,
&one,
(%(float_type)s*)PyArray_DATA(col), &N_,
(%(float_type)s*)PyArray_DATA(weight), &K_,
&zero,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups here,
// but the group-related offsets help explain what M_, N_ and K_ are
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_N,
N_, M_, K_,
1.,
col_data + col_offset * g, N_,
weight + weight_offset * g, K_,
0.,
top_data + (*top)[i]->offset(n) + top_offset * g, N_);
}
}
*/
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
// valid convolution: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%(gemm)s(&Trans, &NTrans,
&K_, &M_, &N_,
&one,
(%(float_type)s*)PyArray_DATA(col), &N_,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_,
(n == 0) ? &zero : &one,
(%(float_type)s*)PyArray_DATA(weight), &K_);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups
for (int n = 0; n < num_; ++n) {
// Since we saved memory in the forward pass by not storing all col
// data, we will need to recompute them.
im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_T, CUBLAS_OP_N, K_, M_, N_,
1.0,
col_data + col_offset * g, N_,
top_diff + top[i]->offset(n) + top_offset * g, N_,
1.0,
weight_diff + weight_offset * g, K_);
}
}
*/
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE(bottom, 0);
// full convolution: gemm, then col2im
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
// gemm into columns
%(gemm)s(&NTrans, &Trans,
&N_, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(top) + n * top_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight), &K_,
&zero,
(%(float_type)s*)PyArray_DATA(col), &N_);
// col2im back to the data
col2im((%(float_type)s*)PyArray_DATA(col), nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
for (int n = 0; n < num_; ++n) {
// gradient w.r.t. bottom data, if necessary
if (propagate_down[i]) {
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[i]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_T, N_, K_, M_,
1.,
top_diff + top[i]->offset(n) + top_offset * g, N_,
weight + weight_offset * g, K_,
0.,
col_diff + col_offset * g, N_);
}
// col2im back to the data
col2im_gpu(col_diff, channels_, height_, width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
bottom_diff + (*bottom)[i]->offset(n));
}
}
*/
}
// Free temporary columns
Py_DECREF(col);
// decref from contiguous check
Py_DECREF(bottom);
Py_DECREF(weight);
Py_DECREF(top);
// Note that we don't change the refcount of the output matrix here. Output
// (re)allocation and refcounting is done in BaseCorrMM.c_code_helper();
// in here output is just aliased to one of bottom, weights, or top.
return output;
}
from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr
import numpy
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr, conv
from theano.tensor.basic import _allclose
class TestCorr2D(utt.InferShapeTester):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN")
else:
mode = None
dtype = theano.config.floatX
def setUp(self):
super(TestCorr2D, self).setUp()
self.input = T.tensor4('input', dtype=self.dtype)
self.input.name = 'default_V'
self.filters = T.tensor4('filters', dtype=self.dtype)
self.filters.name = 'default_filters'
if not conv.imported_scipy_signal and theano.config.cxx == "":
raise SkipTest("CorrMM tests need SciPy or a c++ compiler")
def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1),
input=None, filters=None,
verify_grad=True, non_contiguous=False):
"""
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
"""
N_image_shape = [T.get_scalar_constant_value(T.as_tensor_variable(x))
for x in image_shape]
N_filter_shape = [T.get_scalar_constant_value(T.as_tensor_variable(x))
for x in filter_shape]
if input is None:
input = self.input
if filters is None:
filters = self.filters
# THEANO IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def sym_CorrMM(input, filters):
# define theano graph and function
input.name = 'input'
filters.name = 'filters'
rval = corr.CorrMM(border_mode, subsample)(input, filters)
rval.name = 'corr_output'
return rval
output = sym_CorrMM(input, filters)
output.name = 'CorrMM()(%s,%s)' % (input.name, filters.name)
theano_corr = theano.function([input, filters], output, mode=self.mode)
# initialize input and compute result
image_data = numpy.random.random(N_image_shape).astype(self.dtype)
filter_data = numpy.random.random(N_filter_shape).astype(self.dtype)
if non_contiguous:
image_data = numpy.transpose(image_data, axes=(0, 1, 3, 2))
image_data = image_data.copy()
image_data = numpy.transpose(image_data, axes=(0, 1, 3, 2))
filter_data = numpy.transpose(filter_data, axes=(0, 1, 3, 2))
filter_data = filter_data.copy()
filter_data = numpy.transpose(filter_data, axes=(0, 1, 3, 2))
assert not image_data.flags['CONTIGUOUS']
assert not filter_data.flags['CONTIGUOUS']
theano_output = theano_corr(image_data, filter_data)
# REFERENCE IMPLEMENTATION
# Testing correlation, not convolution. Reverse filters.
filter_data_corr = numpy.array(filter_data[:, :, ::-1, ::-1],
copy=True,
order='C')
orig_image_data = image_data
img_shape2d = numpy.array(N_image_shape[-2:])
fil_shape2d = numpy.array(N_filter_shape[-2:])
subsample2d = numpy.array(subsample)
if border_mode == 'full':
padHW = (fil_shape2d - 1)
elif border_mode == 'valid':
padHW = numpy.array([0, 0])
elif border_mode == 'half':
padHW = numpy.floor(fil_shape2d / 2)
elif isinstance(border_mode, tuple):
padHW = numpy.array(border_mode)
elif isinstance(border_mode, int):
padHW = numpy.array([border_mode, border_mode])
else:
raise NotImplementedError('Unsupported border_mode {}'.format(border_mode))
out_shape2d = numpy.floor((img_shape2d + 2 * (padHW) - fil_shape2d) / subsample2d) + 1
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
ref_output = numpy.zeros(out_shape)
# loop over output feature maps
ref_output.fill(0)
image_data2 = numpy.zeros((N_image_shape[0], N_image_shape[1],
N_image_shape[2] + 2 * padHW[0],
N_image_shape[3] + 2 * padHW[1]))
image_data2[:, :, padHW[0]:padHW[0] + N_image_shape[2],
padHW[1]:padHW[1] + N_image_shape[3]] = image_data
image_data = image_data2
N_image_shape = image_data.shape
for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
for im0 in range(N_image_shape[1]):
filter2d = filter_data_corr[nn, im0, :, :]
image2d = image_data[bb, im0, :, :]
for row in range(ref_output.shape[2]):
irow = row * subsample[0] # image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
ref_output[bb, nn, row, col] += (image2d[
irow:irow + N_filter_shape[2],
icol:icol + N_filter_shape[3]] * filter2d[::-1, ::-1]
).sum()
self.assertTrue(_allclose(theano_output, ref_output))
# TEST GRADIENT
if verify_grad:
utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data])
@attr('slow')
def test_basic(self):
"""
Tests that basic correlations work for odd and even
dimensions of image and filter shapes, as well as rectangular
images and filters.
"""
border_modes = ['valid', 'full', 'half', (1, 1), (2, 1), (1, 2),
(3, 3), 1]
img_shapes = [(2, 2, 3, 3), (3, 2, 8, 8), (3, 2, 7, 5), (3, 2, 7, 5),
(3, 2, 8, 8), (3, 2, 7, 5)]
fil_shapes = [(2, 2, 2, 2), (4, 2, 5, 5), (5, 2, 2, 3), (5, 2, 3, 2),
(4, 2, 5, 5), (5, 2, 2, 3)]
for border_mode in border_modes:
for img, fil in zip(img_shapes, fil_shapes):
self.validate(img, fil, border_mode, verify_grad=False)
# Very slow on with 'full' or 'half'
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def test_img_kernel_same_shape(self):
self.validate((3, 2, 3, 3), (4, 2, 3, 3), 'full')
self.validate((3, 2, 3, 3), (4, 2, 3, 3), 'valid')
self.validate((3, 2, 3, 3), (4, 2, 3, 3), 'half')
self.validate((3, 2, 3, 3), (4, 2, 3, 3), (1, 1))
self.validate((3, 2, 3, 3), (4, 2, 3, 3), 1)
@attr('slow')
def test_subsample(self):
"""
Tests correlation where subsampling != (1,1)
"""
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'valid', subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'full', subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'half', subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), subsample=(3, 3))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3))
@attr('slow')
def test_shape_Constant_tensor(self):
"""
Tests correlation where the {image,filter}_shape is a Constant tensor.
"""
as_t = T.as_tensor_variable
border_modes = ['valid', 'full', 'half', (1, 1), (2, 1), (1, 2), (3, 3), 1]
for border_mode in border_modes:
self.validate((as_t(3), as_t(2), as_t(7), as_t(5)),
(5, 2, 2, 3), border_mode)
self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), border_mode)
self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), border_mode)
self.validate((3, 2, 7, 5), (as_t(5), as_t(2), as_t(2),
as_t(3)), 'valid')
self.validate((3, 2, 7, 5), as_t([5, 2, 2, 3]), border_mode)
self.validate(as_t([3, 2, 7, 5]), as_t([5, 2, 2, 3]), border_mode)
def test_invalid_filter_shape(self):
"""
Tests scenario where filter_shape[1] != input_shape[1]
"""
self.assertRaises(ValueError, self.validate,
(3, 2, 8, 8), (4, 3, 5, 5),
'valid')
def test_full_mode(self):
"""
Tests basic correlation in full mode and case where filter
is larger than the input image.
"""
self.validate((3, 2, 5, 5), (4, 2, 8, 8), 'full')
def f():
self.validate((3, 2, 5, 5), (4, 2, 8, 8), 'valid')
self.assertRaises(Exception, f)
def test_wrong_input(self):
"""
Make sure errors are raised when image and kernel are not 4D tensors
"""
self.assertRaises(Exception, self.validate, (3, 2, 8, 8), (4, 2, 5, 5),
'valid', input=T.dmatrix())
self.assertRaises(Exception, self.validate, (3, 2, 8, 8), (4, 2, 5, 5),
'valid', filters=T.dvector())
self.assertRaises(Exception, self.validate, (3, 2, 8, 8), (4, 2, 5, 5),
'valid', input=T.dtensor3())
@attr('slow')
def test_infer_shape_forward(self):
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corrMM = corr.CorrMM
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_vals = [[4, 5, 6, 3], [6, 2, 8, 3], [3, 6, 7, 5],
[3, 6, 7, 5], [5, 2, 4, 3]]
bivec_vals = [[7, 5, 3, 2], [4, 2, 5, 3], [5, 6, 3, 2],
[5, 6, 2, 3], [6, 2, 4, 3]]
modes = ['valid', 'full', 'half', (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
self._compile_and_check([adtens, bdtens],
[cdtens],
[adtens_val, bdtens_val], corrMM,
warn=False)
@attr('slow')
def test_infer_shape_gradW(self):
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corrMM = corr.CorrMM
gradW = corr.CorrMM_gradWeights
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_vals = [[1, 5, 6, 3], [8, 2, 7, 3], [1, 6, 9, 4],
[9, 6, 8, 5], [9, 1, 6, 8]]
bivec_vals = [[7, 5, 3, 1], [4, 2, 5, 3], [12, 6, 3, 2],
[5, 6, 1, 3], [11, 1, 3, 3]]
modes = ['valid', 'full', 'half', (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
f = theano.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# CorrMM_gradWeights
shape = (theano.shared(bivec_val[2]), theano.shared(bivec_val[3]))
bdtens_g = gradW(border_mode=mode,
subsample=subsample)(adtens, cdtens, shape=shape)
self._compile_and_check([adtens, cdtens],
[bdtens_g],
[adtens_val, cdtens_val], gradW,
warn=False)
@attr('slow')
def test_infer_shape_gradI(self):
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
corrMM = corr.CorrMM
gradI = corr.CorrMM_gradInputs
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_vals = [[1, 5, 6, 3], [8, 2, 7, 3], [1, 6, 9, 4],
[9, 6, 8, 5], [9, 1, 6, 8]]
bivec_vals = [[7, 5, 3, 1], [4, 2, 5, 3], [12, 6, 3, 2],
[5, 6, 1, 3], [7, 1, 3, 4]]
modes = ['valid', 'full', 'half', (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(adtens, bdtens)
f = theano.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# CorrMM_gradInputs
shape = (theano.shared(aivec_val[2]), theano.shared(aivec_val[3]))
adtens_g = gradI(border_mode=mode,
subsample=subsample)(bdtens, cdtens, shape=shape)
self._compile_and_check([bdtens, cdtens],
[adtens_g],
[bdtens_val, cdtens_val], gradI,
warn=False)
def test_non_contiguous(self):
self.validate((2, 2, 3, 3), (2, 2, 2, 2), 'valid', non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), 'valid', non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 3, 2), 'valid', non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), 'full', non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), 'half', non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), (1, 1), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 2), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 2, non_contiguous=True)
if __name__ == '__main__':
t = TestCorr2D('setUp')
t.setUp()
t.test_infer_shape()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论