提交 d844e6c1 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6252 from Faruk-Ahmed/conv3d

phase out outdated conv3d
...@@ -125,9 +125,6 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -125,9 +125,6 @@ TODO: Give examples on how to use these things! They are pretty complicated.
``THEANO_FLAGS=optimizer_excluding=conv_dnn`` in your environment. ``THEANO_FLAGS=optimizer_excluding=conv_dnn`` in your environment.
As dnn_conv has a gradient defined, you can also use it manually. As dnn_conv has a gradient defined, you can also use it manually.
- Implemented operators for neural network 3D / video convolution: - Implemented operators for neural network 3D / video convolution:
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution applying multi-channel 3D filters to batches of
multi-channel 3D images. It does not flip the kernel.
- :func:`GpuCorr3dMM <theano.gpuarray.blas.GpuCorr3dMM>` - :func:`GpuCorr3dMM <theano.gpuarray.blas.GpuCorr3dMM>`
This is a GPU-only 3d correlation relying on a Toeplitz matrix This is a GPU-only 3d correlation relying on a Toeplitz matrix
and gemm implementation (see :func:`GpuCorrMM <theano.sandbox.cuda.blas.GpuCorrMM>`) and gemm implementation (see :func:`GpuCorrMM <theano.sandbox.cuda.blas.GpuCorrMM>`)
...@@ -168,7 +165,6 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -168,7 +165,6 @@ TODO: Give examples on how to use these things! They are pretty complicated.
.. autofunction:: theano.tensor.nnet.conv2d .. autofunction:: theano.tensor.nnet.conv2d
.. autofunction:: theano.tensor.nnet.conv2d_transpose .. autofunction:: theano.tensor.nnet.conv2d_transpose
.. autofunction:: theano.tensor.nnet.conv3d .. autofunction:: theano.tensor.nnet.conv3d
.. autofunction:: theano.tensor.nnet.Conv3D.conv3D
.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d .. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
.. autofunction:: theano.tensor.nnet.conv.conv2d .. autofunction:: theano.tensor.nnet.conv.conv2d
......
from __future__ import absolute_import, print_function, division
import numpy as np
from six.moves import xrange
import theano
from theano.tensor import basic as T
# from util import strutil
from theano.tensor.blas_headers import blas_header_text, blas_header_version
from theano.tensor.blas import ldflags
from theano.misc import strutil
from theano.gradient import grad_undefined
# Note: not a true convolution because we don't bother with flipping the kernel
# An op that takes a weight tensor W. a bias vector b, and a visible tensor V, produces a hidden unit tensor H
# Also parmeterized by integer strides dr,dc,dt
# H[i,r,c,t,j] = video i within the minibatch, feature map j, location and time within feature map (r,c,t)
# W[j,k,l,m,z] = weights connecting H[i,r,c,t,j] to V[i,dr*r+k,dc*c+l,dt*t+m,z]
# b[j] = bias of feature map j
# V[i,r,c,t,j] = pixel at (r,c,t) within video featuremap j of video i within the minibatch
# i.e., H[i,j,r,c,t] = b_j + sum_k sum_l sum_m sum_z W[j,k,l,m,z] V[i,z, dr*r+k,dc*c+l,dt*t+m]
# The layouts of these variables are chosen to improve locality of reference.
# numpy seems to put the largest stride on axis 0 and decrease the stride from there. If we do convolution
# one filter at a time, one example at a time, then we want the largest strides to
# be over the examples. We want the smallest stride to be over the input channel because as we change
# the channel we re-visit the same location in the input.
# The smallest stride being over the input channel means that the weights need to be formatted with the input
# channel as the last index
# partial C / partial b_j = sum_i sum_k sum_r sum_c sum_t (partial C / partial H[i,r,c,t,k] ) * ( partial H[i,r,c,t,k] / partial b_j )
# = sum_i sum_k sum_r sum_c sum_t (partial C / partial H[i,r,c,t,k] ) * delta(k = j)
# = sum_i sum_r sum_c sum_t (partial C / partial H[i,r,c,t,j] )
# partial C / partial W[j,k,l,m,z] = sum_i sum_n sum_p sum_q sum_r (partial C /partial H[i,p,q,r,n] ) * (partial H[i,p,q,r,n] / partial W[j,k,l,m,z])
# = partial C / partial W[j,k,l,m,z] = sum_i sum_n sum_p sum_q sum_r (partial C /partial H[i,p,q,r,n] ) *
# (partial sum_s sum_u sum_v sum_a W[n,a, s,u,v] V[i, dr*p+s,dc*q+u,dt*r+v, a] ) / partial W[j,k,l,m,z])
# = partial C / partial W[j,k,l,m,z] = sum_i sum_p sum_q sum_r (partial C /partial H[i,p,q,r,j] ) *
# (partial sum_s sum_u sum_v sum_a W[j,a, s,u,v] V[i,dr*p+s,dc*q+u,dt*r+v,a] ) / partial W[j,k,l,m,z])
# = partial C / partial W[j,k,l,m,z] = sum_i sum_p sum_q sum_r (partial C /partial H[i,p,q,r,j] ) * V[i,dr*p+k,dc*q+l,dt*r+m,z]
# derivatives wrt V unimplemented for now. derivatives wrt dr, dc, dt are undefined since
# the output function is only defined when dr, dc, dt are natural numbers.
class Conv3D(theano.Op):
"""
3D `convolution` of multiple filters on a minibatch.
Notes
-----
Does not flip the kernel, moves kernel with a user specified stride.
"""
__props__ = ()
def c_code_cache_version(self):
return (3, blas_header_version())
def make_node(self, V, W, b, d):
"""
Parameters
----------
V
Visible unit, input(batch,row,column,time,in channel)
W
Weights, filter(out channel,row,column,time,in channel)
b
bias, shape == (W.shape[0],)
d
strides when moving the filter over the input(dx,dy,dt)
"""
V_ = T.as_tensor_variable(V)
W_ = T.as_tensor_variable(W)
b_ = T.as_tensor_variable(b)
d_ = T.as_tensor_variable(d)
bcast = (V_.broadcastable[0], False, False, False, W_.broadcastable[0])
node = theano.Apply(self, inputs=[V_, W_, b_, d_],
outputs=[T.TensorType(V_.dtype, bcast)()])
return node
def grad(self, inputs, output_gradients):
V, W, b, d = inputs
dCdH, = output_gradients
# make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
# print dCdH.broadcastable
# print "dCdH.broadcastable"
# quit(-1)
# dCdH = printing.Print("dCdH = ",["shape"])
# Make sure the broadcasting pattern of the gradient is the the same
# as the initial variable
dCdV = theano.tensor.nnet.convTransp3D(
W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
dCdV = T.patternbroadcast(dCdV, V.broadcastable)
WShape = W.shape
dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
dCdW = T.patternbroadcast(dCdW, W.broadcastable)
dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
dCdb = T.patternbroadcast(dCdb, b.broadcastable)
dCdd = grad_undefined(
self, 3, inputs[3],
"The gradient of Conv3D with respect to the convolution"
" stride is undefined because Conv3D is only defined for"
" integer strides.")
if 'name' in dir(dCdH) and dCdH.name is not None:
dCdH_name = dCdH.name
else:
dCdH_name = 'anon_dCdH'
if 'name' in dir(V) and V.name is not None:
V_name = V.name
else:
V_name = 'anon_V'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon_W'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon_b'
dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ')')
dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ',b=' + b_name + ')')
return [dCdV, dCdW, dCdb, dCdd]
def perform(self, node, inputs, output_storage):
V, W, b, d = inputs
# print "Conv3D python code"
output_storage[0][0] = computeH(V, W, b, d)
def infer_shape(self, node, input_shapes):
V, W, b, d = node.inputs
V_shape, W_shape, b_shape, d_shape = input_shapes
dr = d[0]
dc = d[1]
dt = d[2]
batch_size = V_shape[0]
output_channels = W_shape[0]
vidHeight = V_shape[1]
filterHeight = W_shape[1]
vidWidth = V_shape[2]
filterWidth = W_shape[2]
vidDur = V_shape[3]
filterDur = W_shape[3]
output_height = ((vidHeight - filterHeight) // dr) + 1
output_width = ((vidWidth - filterWidth) // dc) + 1
output_dur = ((vidDur - filterDur) // dt) + 1
rval = (batch_size, output_height, output_width, output_dur, output_channels)
return [rval]
def c_support_code(self):
return blas_header_text()
def c_libraries(self):
return ldflags()
def c_compile_args(self):
flags = ldflags(libs=False, flags=True)
return flags
def c_lib_dirs(self):
return ldflags(libs=False, libs_dir=True)
def c_header_dirs(self):
return ldflags(libs=False, include_dir=True)
def c_code(self, node, nodename, inputs, outputs, sub):
V, W, b, d = inputs
fail = sub['fail']
H = outputs[0]
codeSource = """
///////////// < code generated by Conv3D >
//printf("\t\t\t\tConv3D c code\\n");
//Check dimensionality of inputs
if (PyArray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "Conv3D: W must be a 5 dimensional tensor");
%(fail)s
}
if (PyArray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "Conv3D: V must be a 5 dimensional tensor");
%(fail)s
}
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError,"Conv3D: b must be a vector.");
%(fail)s
}
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError,"Conv3D: d must be a vector.");
%(fail)s
}
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError,"Conv3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
//Read and check sizes of inputs
{ // exta scope so error handler jumps don't cause errors
const int batchSize = PyArray_DIMS(%(V)s)[0];
const int outputChannels = PyArray_DIMS(%(W)s)[0];
const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (PyArray_DIMS(%(W)s)[4] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%ld channel image but the image has %%d channels. Overall shape of input: (%%ld,%%ld,%%ld,%%ld,%%ld)", (long)PyArray_DIMS(%(W)s)[4], inputChannels, (long)PyArray_DIMS(%(V)s)[0], (long)PyArray_DIMS(%(V)s)[1], (long)PyArray_DIMS(%(V)s)[2], (long)PyArray_DIMS(%(V)s)[3], (long)PyArray_DIMS(%(V)s)[4]);
%(fail)s
}
if (PyArray_DIMS(%(b)s)[0] != outputChannels)
{
PyErr_Format(PyExc_ValueError, "Conv3D: b adds to a(n) %%ld channel output image but the output has %%d channels", (long)PyArray_DIMS(%(b)s)[0], outputChannels);
%(fail)s
}
{ //extra scope so error handler jumps don't cause errors
const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = PyArray_DIMS(%(W)s)[3];
const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = PyArray_DIMS(%(V)s)[3];\
if (vidHeight < filterHeight)
{
PyErr_Format(PyExc_ValueError, "W has a height of %%i but V is only %%i pixels tall",filterHeight,vidHeight);
%(fail)s
}
{ // extra scope so fail works
if (vidWidth < filterWidth)
{
PyErr_Format(PyExc_ValueError, "W has a width of %%i but V is only %%i pixels wide",filterWidth,vidWidth);
%(fail)s
}
{ // extra scope so fail works
if (vidDur < filterDur)
{
PyErr_Format(PyExc_ValueError, "W has a duration of %%i but V is only %%i pixels long",filterDur,vidDur);
%(fail)s
}
{ // extra scope so fail works
//Read and check stride arguments
const int dr = *(dtype_%(d)s*) PyArray_GETPTR1(%(d)s,0);
const int dc = *(dtype_%(d)s*) PyArray_GETPTR1(%(d)s,1);
const int dt = *(dtype_%(d)s*) PyArray_GETPTR1(%(d)s,2);
if (dr <= 0 || dc <= 0 || dt <= 0)
{
PyErr_Format(PyExc_ValueError,"Conv3D: Strides must all be positive but are %%i, %%i, %%i",dr,dc,dt);
%(fail)s
}
{ // extra scope so fail works
//Make correctly sized output
const long long outputHeight = int( (vidHeight - filterHeight) / dr )+1;
const long long outputWidth = int( (vidWidth - filterWidth) / dc )+1;
const long long outputDur = int( (vidDur - filterDur) / dt ) +1;
npy_intp dims[5];
dims[0] = batchSize;
dims[4] = outputChannels;
dims[1] = outputHeight;
dims[2] = outputWidth;
dims[3] = outputDur;
if(!(%(H)s) || PyArray_DIMS(%(H)s)[0]!=dims[0] ||
PyArray_DIMS(%(H)s)[1]!=dims[1] ||
PyArray_DIMS(%(H)s)[2]!=dims[2] ||
PyArray_DIMS(%(H)s)[3]!=dims[3] ||
PyArray_DIMS(%(H)s)[4]!=dims[4]){
Py_XDECREF(%(H)s);
%(H)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(H)s)) {
PyErr_Format(PyExc_MemoryError,"Conv3D: Could not allocate output.");
%(fail)s
}
}
{ // extra scope so fail works
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int ws0 = PyArray_STRIDES(%(W)s)[0];
const int ws1 = PyArray_STRIDES(%(W)s)[1];
const int ws2 = PyArray_STRIDES(%(W)s)[2];
const int vs1 = PyArray_STRIDES(%(V)s)[1];
const int ws4 = PyArray_STRIDES(%(W)s)[4];
const int vs4 = PyArray_STRIDES(%(V)s)[4];
const int ws3 = PyArray_STRIDES(%(W)s)[3];
const int vs3 = PyArray_STRIDES(%(V)s)[3];
const int vs2 = PyArray_STRIDES(%(V)s)[2];
const int bs = PyArray_STRIDES(%(b)s)[0];
const int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute H
//H[i,j,x,y,t] = b_j + sum_k sum_l sum_m sum_z W[j,z,k,l,m] V[i,z, dr*r+k,dc*c+l,dt*t+m]
//TODO: add special cases
// ex: filterDur == 1 && batchSize == 1 && dt = 1 (for SFA)
// ex: inputChannels == 1 """
# if the data types are not mixed, we can insert special case
# optimizations based on BLAS
VV, WV, bv, dv = node.inputs
HV = node.outputs[0]
if (theano.config.blas.ldflags and
VV.dtype == WV.dtype and HV.dtype == VV.dtype):
if VV.dtype == 'float64':
gemv = 'dgemv_'
elif VV.dtype == 'float32':
gemv = 'sgemv_'
else:
raise Exception('Unrecognized dtype for convolution ' + V.value.dtype)
codeSource += """
if (inputChannels > 20 && outputChannels > 20 && ws4 == sizeof(ELEM_AT(%(W)s,0)))
{
//std::cout << "lots of channels special case code" << std::endl;
#define blas_type dtype_ ## %(V)s
const blas_type constant_one = 1.0;
char N = 'T';
int ws0e = ws0 / sizeof(ELEM_AT(%(W)s,0));
int vs4e = vs4 / sizeof(ELEM_AT(%(V)s,4));
int hs4e = hs4 / sizeof(ELEM_AT(%(H)s,4));
//special case code for the "lots of channels" case
//uses a BLAS matrix vector multiply to compute the contribute for
//all channels of an input pixel to all channels of an output pixel
//simultaneously
long long Hpos = 0;
long long Vpos = 0;
for (int i = 0; i < batchSize; i++) {
long long Hposi = Hpos;
long long Vposi = Vpos;
for (int r = 0; r < outputHeight; r++) {
long long Hposr = Hpos;
long long Vposr = Vpos;
for (int c = 0; c < outputWidth; c++) {
long long Hposc = Hpos;
long long Vposc = Vpos;
for (int t = 0; t < outputDur; t++) {
long long Hpost = Hpos;
long long Vpost = Vpos;
//of the loops so far, j should be the innermost, because
//each loop through j visits the same elements of V
//this implies that the last index of H should be the j index
//since V and H should have the same format, this means
//z should be the last index in v, and therefore the innermost
//of the next set of for loops
int Wpos = 0;
int bPos = 0;
long long Hposj = Hpos;
for (int j = 0; j < outputChannels; j++) {
// H[i,r,c,t,j] = b[j]
ELEM_AT(%(H)s,Hposj) = ELEM_AT(%(b)s,bPos);
Hposj += hs4;
bPos += bs;
}
dtype_%(H)s * writePos = & ELEM_AT(%(H)s,Hpos);
for (int k =0; k < filterHeight; k++) {
int Wposk = Wpos;
long long Vposk = Vpos;
for (int l = 0; l < filterWidth; l++) {
int Wposl = Wpos;
long long Vposl = Vpos;
for (int m = 0; m < filterDur; m++) {
//H[i,r,c,t,:] += np.dot(W[:,k,l,m,:],V[i,dr*r+k,dc*c+l,dt*t+m,:])
//note: changing the weights so that outputChannels and inputChannels were the last two rather than
//the first and last elements did not speed this up, even for extremely large input sizes
%(gemv)s(&N, & inputChannels, & outputChannels,
&constant_one, & ELEM_AT( %(W)s , Wpos),& ws0e,
& ELEM_AT(%(V)s, Vpos),& vs4e, &constant_one,
writePos,& hs4e);
Wpos += ws3;
Vpos += vs3;
} // close m
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
} //closes "lots of channels" special case code
else
"""
codeSource += """
{
//General case code
//std::cout << "general case code" << std::endl;
long long Hpos = 0;
long long Vpos = 0;
for (int i = 0; i < batchSize; i++) {
long long Hposi = Hpos;
long long Vposi = Vpos;
for (int r = 0; r < outputHeight; r++) {
long long Hposr = Hpos;
long long Vposr = Vpos;
for (int c = 0; c < outputWidth; c++) {
long long Hposc = Hpos;
long long Vposc = Vpos;
for (int t = 0; t < outputDur; t++) {
long long Hpost = Hpos;
long long Vpost = Vpos;
//of the loops so far, j should be the innermost, because
//each loop through j visits the same elements of V
//this implies that the last index of H should be the j index
//since V and H should have the same format, this means
//z should be the last index in v, and therefore the innermost
//of the next set of for loops
int Wpos = 0;
int bPos = 0;
for (int j = 0; j < outputChannels; j++) {
long long Hposj = Hpos;
long long Vposj = Vpos;
int Wposj = Wpos;
// H[i,r,c,t,j] = b[j]
dtype_%(H)s & writePos = ELEM_AT(%(H)s,Hpos);
writePos = ELEM_AT(%(b)s,bPos);
for (int k =0; k < filterHeight; k++) {
int Wposk = Wpos;
long long Vposk = Vpos;
for (int l = 0; l < filterWidth; l++) {
int Wposl = Wpos;
long long Vposl = Vpos;
for (int m = 0; m < filterDur; m++) {
int Wposm = Wpos;
long long Vposm = Vpos;
for (int z = 0; z < inputChannels; z++) {
//H[i,r,c,t,j] += W[j,z,k,l,m] * V[i,dr*r+k, dc*c+l, dt*t+m,z]
writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(V)s,Vpos);
Wpos += ws4;
Vpos += vs4;
} // close z
Wpos = Wposm + ws3;
Vpos = Vposm + vs3;
} // close m
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
bPos += bs;
Wpos = Wposj + ws0;
Hpos = Hposj + hs4;
Vpos = Vposj;
//std::cout << "incremented Wpos by " << ws0 << std::endl;
//std::cout << "incremented Hpos by " << hs4 << std::endl;
} //close j
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
} //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations
///////////// < /code generated by Conv3D >
"""
return strutil.render_string(codeSource, locals())
_conv3D = Conv3D()
def conv3D(V, W, b, d):
"""
3D "convolution" of multiple filters on a minibatch.
(does not flip the kernel, moves kernel with a user specified stride)
Parameters
----------
V
Visible unit, input.
Dimensions: (batch, row, column, time, in channel).
W
Weights, filter.
Dimensions: (out channel, row, column, time ,in channel).
b
Bias, shape == (W.shape[0],).
d
Strides when moving the filter over the input(dx, dy, dt).
Notes
-----
The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization.
Please use nnet.conv3d instead of this for a faster GPU implementation.
See Also
--------
Someone made a script that shows how to swap the axes
between both 3d convolution implementations in Theano. See
the last `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
"""
return _conv3D(V, W, b, d)
def computeH(V, W, b, d):
assert len(W.shape) == 5
assert len(V.shape) == 5
if len(b.shape) != 1:
print(b.shape)
assert False
assert len(d) == 3
batchSize = V.shape[0]
outputChannels = W.shape[0]
inputChannels = V.shape[4]
if W.shape[4] != inputChannels:
raise Exception("W.shape[4] = " + str(W.shape[4]) + " but inputChannels = " + str(inputChannels))
filterHeight = W.shape[1]
filterWidth = W.shape[2]
filterDur = W.shape[3]
vidHeight = V.shape[1]
vidWidth = V.shape[2]
vidDur = V.shape[3]
assert vidHeight >= filterHeight
assert vidWidth >= filterWidth
assert vidDur >= filterDur
dx, dy, dt = d
assert dx > 0
assert dy > 0
assert dt > 0
outputHeight = int((vidHeight - filterHeight) / dx) + 1
outputWidth = int((vidWidth - filterWidth) / dy) + 1
outputDur = int((vidDur - filterDur) / dt) + 1
H = np.zeros((batchSize, outputHeight,
outputWidth, outputDur, outputChannels), dtype=V.dtype)
# H[i,j,x,y,t] = b_j + sum_k sum_l sum_m sum_z W[j,z,k,l,m] V[i,z, dx*x+k,dy*y+l,dt*t+m]
for i in xrange(0, H.shape[0]):
# print '\texample '+str(i+1)+'/'+str(H.shape[0])
for j in xrange(0, H.shape[4]):
# print '\t\tfeature map '+str(j+1)+'/'+str(H.shape[1])
for x in xrange(0, H.shape[1]):
# print '\t\t\trow '+str(x+1)+'/'+str(H.shape[2])
for y in xrange(0, H.shape[2]):
for t in xrange(0, H.shape[3]):
H[i, x, y, t, j] = b[j]
for k in xrange(0, filterHeight):
for l in xrange(0, filterWidth):
for m in xrange(0, filterDur):
for z in xrange(0, inputChannels):
# if (i,j,x,y,t) == (0,0,0,0,0):
# print (( W[j,z,k,l,m] , V[i,z,d[0]*x+k,d[1]*y+l,d[2]*t+m] ), (k,l,m) )
w = W[j, k, l, m, z]
v = V[i, d[0] * x + k, d[1] * y + l, d[2] * t + m, z]
# if i == 0 and x == 0 and y == 0 and t == 0 and j == 0:
# print 'setting H[0] += '+str(w*v)+' W['+str((j,z,k,l,m))+']='+str(w)+' V['+str((i,d[0]*x+k,d[1]*y+l,d[2]*t+m,z))+']='+str(v)
H[i, x, y, t, j] += w * v
return H
from __future__ import absolute_import, print_function, division
from six.moves import xrange
import numpy as np
import theano
from theano.tensor import basic as T
from theano.misc import strutil
from theano.gradient import grad_undefined
from theano.gradient import DisconnectedType
# TODO: speed up by reordering loops. Should pass through the videos once, incrementing all weight gradients, rather
# than visiting each weight gradient element once and passing through whole video
class ConvGrad3D(theano.Op):
"""
Gradient of Conv3D with respect to W.
"""
__props__ = ()
def c_code_cache_version(self):
return (1,)
def make_node(self, V, d, WShape, dCdH):
V_ = T.as_tensor_variable(V)
d_ = T.as_tensor_variable(d)
WShape_ = T.as_tensor_variable(WShape)
dCdH_ = T.as_tensor_variable(dCdH)
return theano.Apply(self,
inputs=[V_, d_, WShape_, dCdH_],
outputs=[T.TensorType(
V_.dtype,
(False, False, False, False, False))()])
def infer_shape(self, node, input_shapes):
V, d, W_shape, dCdH = node.inputs
return [(W_shape[0], W_shape[1], W_shape[2], W_shape[3], W_shape[4])]
def connection_pattern(self, node):
return [[True], [True], [False], [True]]
def grad(self, inputs, output_gradients):
C, d, WShape, B = inputs
dLdA, = output_gradients
z = T.zeros_like(C[0, 0, 0, 0, :])
dLdC = theano.tensor.nnet.convTransp3D(dLdA, z, d, B, C.shape[1:4])
# d actually does affect the outputs, so it's not disconnected
dLdd = grad_undefined(self, 1, d)
# The shape of the weights doesn't affect the output elements
dLdWShape = DisconnectedType()()
dLdB = theano.tensor.nnet.conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)
return [dLdC, dLdd, dLdWShape, dLdB]
def perform(self, node, inputs, output_storage):
V, d, WShape, dCdH = inputs
# print "ConvGradW3D python code"
# partial C / partial W[j,z,k,l,m] = sum_i sum_p sum_q sum_r (partial C /partial H[i,j,p,q,r] ) * V[i,z,dr*p+k,dc*q+l,dt*r+m]
batchSize = dCdH.shape[0]
outputHeight = dCdH.shape[1]
outputWidth = dCdH.shape[2]
outputDur = dCdH.shape[3]
assert V.shape[0] == batchSize
dr, dc, dt = d
dCdW = np.zeros(WShape, dtype=V.dtype)
# print 'computing output of shape '+str(WShape)
for k in xrange(0, WShape[1]):
for l in xrange(0, WShape[2]):
for m in xrange(0, WShape[3]):
for i in xrange(0, batchSize):
for p in xrange(0, outputHeight):
for q in xrange(0, outputWidth):
for r in xrange(0, outputDur):
for j in xrange(0, WShape[0]):
for z in xrange(0, WShape[4]):
dCdW[j, k, l, m, z] += (
dCdH[i, p, q, r, j] *
V[i, dr * p + k, dc * q + l,
dt * r + m, z])
output_storage[0][0] = dCdW
def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs
fail = sub['fail']
dCdW = outputs[0]
codeSource = """
///////////// < code generated by ConvGradW3D >
//printf("\t\t\t\tConvGradW3D c code\\n");
//Check dimensionality of inputs
if (PyArray_NDIM(%(dCdH)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: dCdH must be a 5 dimensional tensor");
%(fail)s
}
if (PyArray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: V must be a 5 dimensional tensor");
%(fail)s
}
if (PyArray_NDIM(%(WShape)s) != 1)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must be a vector.");
%(fail)s
}
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: d must be a vector.");
%(fail)s
}
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
{ //extra scope so that fail will not jump over declarations
//Read and check sizes of inputs
const int batchSize = PyArray_DIMS(%(V)s)[0];
if (PyArray_DIMS(%(WShape)s)[0] != 5)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must specify a 5D shape");
%(fail)s
}
if (!PyArray_ISCONTIGUOUS(%(WShape)s))
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must be contiguous");
%(fail)s
}
{ //extra scope so that fail will not jump over declarations
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) PyArray_DATA(%(WShape)s);
const int outputChannels = WShape[0];
const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (WShape[4] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W operates on a %%i channel image but the image has %%i channels",(int) WShape[1],inputChannels);
%(fail)s
}
{ //extra scope so fail works
const int filterHeight = WShape[1];
const int filterWidth = WShape[2];
const int filterDur = WShape[3];
const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = PyArray_DIMS(%(V)s)[3];
if (vidHeight < filterHeight)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W has a height of %%i but V is only %%i pixels tall", filterHeight, vidHeight);
%(fail)s
}
if (vidWidth < filterWidth)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: W has a width of %%i but V is only %%i pixels tall",filterWidth,vidWidth);
%(fail)s
}
if (vidDur < filterDur)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: W has a duration of %%i but V is only %%i pixels long",filterDur,vidDur);
%(fail)s
}
{ // extra scope so fail works
//Read and check stride arguments
const int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
const int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
const int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
if (dr <= 0 || dc <= 0 || dt <= 0)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: Strides should all be positive but they are %%i, %%i, %%i",dr,dc,dt);
%(fail)s
}
{ // extra scope so fail works
//Compute correct sized of output
const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
const int outputDur = int( (vidDur - filterDur) / dt ) +1;
if (PyArray_DIMS(%(dCdH)s)[0] != batchSize ||
PyArray_DIMS(%(dCdH)s)[4] != outputChannels ||
PyArray_DIMS(%(dCdH)s)[1] != outputHeight ||
PyArray_DIMS(%(dCdH)s)[2] != outputWidth ||
PyArray_DIMS(%(dCdH)s)[3] != outputDur)
{
PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, (long)PyArray_DIMS(%(dCdH)s)[0], (long)PyArray_DIMS(%(dCdH)s)[1], (long)PyArray_DIMS(%(dCdH)s)[2], (long)PyArray_DIMS(%(dCdH)s)[3], (long)PyArray_DIMS(%(dCdH)s)[4]);
%(fail)s
}
{ // extra scope for fail
npy_intp dims[5];
dims[0] = outputChannels;
dims[4] = inputChannels;
dims[1] = filterHeight;
dims[2] = filterWidth;
dims[3] = filterDur;
if(!(%(dCdW)s) || PyArray_DIMS(%(dCdW)s)[0]!=dims[0] ||
PyArray_DIMS(%(dCdW)s)[1]!=dims[1] ||
PyArray_DIMS(%(dCdW)s)[2]!=dims[2] ||
PyArray_DIMS(%(dCdW)s)[3]!=dims[3] ||
PyArray_DIMS(%(dCdW)s)[4]!=dims[4] ){
Py_XDECREF(%(dCdW)s);
%(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(dCdW)s)) {
PyErr_Format(PyExc_MemoryError,"ConvGrad3D: Could not allocate dCdW");
%(fail)s
}
}
{ //extra scope so fail works
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];
// Compute dCdW
//TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
// dCdW[j,k,l,m,z] = sum_i sum_p sum_q sum_r dCdH[i,p,q,r,j] * V[i,dr*p+k,dc*q+l,dt*r+m,z]
for (int j = 0; j < outputChannels; j++) {
for (int z = 0; z < inputChannels; z++) {
for (int k = 0; k < filterHeight; k++) {
for (int l = 0; l < filterWidth; l++) {
for (int m = 0; m < filterDur; m++) {
//printf("writePos %%i %%i %%i %%i %%i \\n",j,k,l,m,z);
dtype_%(dCdW)s & writePos = ELEM5(%(dCdW)s, j,k,l,m,z);
writePos = 0;
for (int i = 0; i < batchSize; i++) {
for (int p = 0; p < outputHeight; p++) {
for (int q = 0; q < outputWidth; q++) {
int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] + (dr * p+k) * PyArray_STRIDES(%(V)s)[1] + (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];
for (int r = 0; r < outputDur; r++) {
writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
//writePos += ELEM_AT(%(dCdH)s,Hpos) * ELEM_AT(%(V)s,Vpos);
Hpos += dhs3;
Vpos += dtvs3;
}
}
}
}
}
}
}
}
}
}}}}}}} // extra scope for fail
///////////// < /code generated by ConvGradW3D >
"""
return strutil.render_string(codeSource, locals())
convGrad3D = ConvGrad3D()
from __future__ import absolute_import, print_function, division
import numpy as np
from six.moves import xrange
import theano
from theano.tensor import basic as T
from theano.misc import strutil
from theano.gradient import grad_undefined
from theano.gradient import DisconnectedType
class ConvTransp3D(theano.Op):
"""
"Transpose" of Conv3D (Conv3D implements multiplication by an implicitly
defined matrix W. This implements multiplication by its transpose).
"""
__props__ = ()
def c_code_cache_version(self):
return (3,)
def make_node(self, W, b, d, H, RShape=None):
"""
Parameters
----------
W
Weights, filter
b
Bias, shape == (W.shape[0],).
d
Strides when moving the filter over the input.
H
The output of Conv3D.
"""
W_ = T.as_tensor_variable(W)
b_ = T.as_tensor_variable(b)
d_ = T.as_tensor_variable(d)
H_ = T.as_tensor_variable(H)
if RShape:
RShape_ = T.as_tensor_variable(RShape)
else:
RShape_ = T.as_tensor_variable([-1, -1, -1])
return theano.Apply(self,
inputs=[W_, b_, d_, H_, RShape_],
outputs=[T.TensorType(H_.dtype,
(False, False, False, False, False))()])
def infer_shape(self, node, input_shapes):
W, b, d, H, RShape = node.inputs
W_shape, b_shape, d_shape, H_shape, RShape_shape = input_shapes
return [(H_shape[0], RShape[0], RShape[1], RShape[2], W_shape[4])]
def connection_pattern(self, node):
return [[True], [True], [True], [True], [False]]
def grad(self, inputs, output_gradients):
W, b, d, H, RShape = inputs
dCdR, = output_gradients
dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
WShape = W.shape
dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
# not differentiable, since d affects the output elements
dCdd = grad_undefined(self, 2, d)
# disconnected, since RShape just determines the output shape
dCdRShape = DisconnectedType()()
if 'name' in dir(dCdR) and dCdR.name is not None:
dCdR_name = dCdR.name
else:
dCdR_name = 'anon_dCdR'
if 'name' in dir(H) and H.name is not None:
H_name = H.name
else:
H_name = 'anon_H'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon_W'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon_b'
dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name)
dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name + ',b=' + b_name)
dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name
return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
def perform(self, node, inputs, output_storage):
W, b, d, H, RShape = inputs
# print "\t\t\t\tConvTransp3D python code"
output_storage[0][0] = computeR(W, b, d, H, RShape)
def c_code(self, node, nodename, inputs, outputs, sub):
W, b, d, H, RShape = inputs
fail = sub['fail']
R = outputs[0]
codeSource = """
///////////// < code generated by ConvTransp3D >
//printf("\t\t\t\tConvTransp3D c code\\n");
//Check dimensionality of inputs
if (PyArray_NDIM(%(H)s) != 5)
{
PyErr_Format(PyExc_ValueError,
"H must be a 5-D tensor but it is %%i-D",
PyArray_NDIM(%(H)s));
%(fail)s
}
if (PyArray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: W must be a 5-D tensor");
%(fail)s
}
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b must be a vector");
%(fail)s
}
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: d must be a vector");
%(fail)s
}
//Read and check stride arguments
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0] );
%(fail)s
}
{ // for fail 1
int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
if (dr <= 0 || dc <= 0 || dt <= 0)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: Strides must all be positive but are %%i, %%i, %%i",dr,dc,dt);
%(fail)s
}
//Read and check sizes of inputs
{ // for fail 2
const int batchSize = PyArray_DIMS(%(H)s)[0];
const int outputChannels = PyArray_DIMS(%(W)s)[0];
if (PyArray_DIMS(%(H)s)[4] != outputChannels)
{
PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)PyArray_DIMS(%(H)s)[4], (long)PyArray_DIMS(%(W)s)[0], (long)PyArray_DIMS(%(W)s)[1], (long)PyArray_DIMS(%(W)s)[2], (long)PyArray_DIMS(%(W)s)[3], (long)PyArray_DIMS(%(W)s)[4], (long)PyArray_DIMS(%(H)s)[0], (long)PyArray_DIMS(%(H)s)[1], (long)PyArray_DIMS(%(H)s)[2], (long)PyArray_DIMS(%(H)s)[3], (long)PyArray_DIMS(%(H)s)[4]);
%(fail)s
}
{ // for fail 3
const int inputChannels = PyArray_DIMS(%(W)s)[4];
if (PyArray_DIMS(%(b)s)[0] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)PyArray_DIMS(%(b)s)[0], inputChannels );
%(fail)s
}
{ // for fail 4
const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = PyArray_DIMS(%(W)s)[3];
const int outputHeight = PyArray_DIMS(%(H)s)[1];
const int outputWidth = PyArray_DIMS(%(H)s)[2];
const int outputDur = PyArray_DIMS(%(H)s)[3];
int videoHeight = (outputHeight-1) * dr + filterHeight;
int videoWidth = (outputWidth-1) * dc + filterWidth;
int videoDur = (outputDur-1) * dt + filterDur;
if (%(RShape)s)
{
if (PyArray_NDIM(%(RShape)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: RShape must be a vector");
%(fail)s
}
if (PyArray_DIMS(%(RShape)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
%(fail)s
}
dtype_%(RShape)s RShape0 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,0);
dtype_%(RShape)s RShape1 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,1);
dtype_%(RShape)s RShape2 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,2);
if (RShape0 != -1)
{
if (RShape0 < videoHeight || RShape1 < videoWidth || RShape2 < videoDur)
{
PyErr_Format(PyExc_ValueError, "Reconstruction must have physical shape of at least [%%i,%%i,%%i] but RShape argument requests that it be [%%i,%%i,%%i]\\n",videoHeight,videoWidth,videoDur,(int) RShape0,(int) RShape1,(int) RShape2);
%(fail)s
}
videoHeight = RShape0;
videoWidth = RShape1;
videoDur = RShape2;
}
} //closes if RShape
{ // for fail 5
//Allocate the reconstruction
npy_intp dims[5];
dims[0] = batchSize;
dims[4] = inputChannels;
dims[1] = videoHeight;
dims[2] = videoWidth;
dims[3] = videoDur;
if(!(%(R)s) || PyArray_DIMS(%(R)s)[0]!=dims[0] ||
PyArray_DIMS(%(R)s)[1]!=dims[1] ||
PyArray_DIMS(%(R)s)[2]!=dims[2] ||
PyArray_DIMS(%(R)s)[3]!=dims[3] ||
PyArray_DIMS(%(R)s)[4]!=dims[4])
{
Py_XDECREF(%(R)s);
%(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(H)s)->type_num);
if (!(%(R)s)) {
PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R");
%(fail)s
}
}
{ // for fail 6
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
dtype_%(b)s * b = (dtype_%(b)s *) PyArray_DATA(%(b)s);
int rs4 = PyArray_STRIDES(%(R)s)[4];
int ws0 = PyArray_STRIDES(%(W)s)[0];
int ws4 = PyArray_STRIDES(%(W)s)[4];
int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute R
// R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]
for (int i = 0; i < batchSize; i++) {
for (int r = 0; r < videoHeight; r++) {
const int frc = (int)std::max(0.0f, ceilf(float(r-filterHeight+1)/float(dr)));
for (int c = 0; c < videoWidth; c++) {
const int fcc = (int)std::max(0.0f, ceilf(float(c-filterWidth +1)/float(dc)));
for (int t = 0; t < videoDur; t++) {
const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt)));
long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];
long long Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
{
//ELEM5(%(R)s, i,r,c,t,j) = b[j];
ELEM_AT(%(R)s,Rpos) = b[j];
Rpos += rs4;
}
for (int rc = frc; rc < outputHeight; rc++) {
const int rk = r - rc * dr;
if (rk < 0) break;
for (int cc = fcc; cc < outputWidth; cc++) {
const int ck = c - cc * dc;
if (ck < 0) break;
for (int tc = ftc; tc < outputDur; tc++)
{
const int tk = t - tc * dt;
if (tk < 0) break;
int Wpos = rk * PyArray_STRIDES(%(W)s)[1] + ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
int Hpostc = i * PyArray_STRIDES(%(H)s)[0] + rc * PyArray_STRIDES(%(H)s)[1] + cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
{
int Wposj = Wpos;
dtype_%(R)s & writePos = ELEM_AT(%(R)s,Rpos);
int Hpos = Hpostc;
for (int k = 0; k < outputChannels; k++) {
//TODO-- it's probably bad in terms of cache that our inner loop is over the largest stride of W.... maybe OK since it's the smallest stride of H
//writePos += ELEM5(%(W)s,k,rk,ck,tk,j) * ELEM5(%(H)s,i,rc,cc,tc,k);
//writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);
writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);
Wpos += ws0;
Hpos += hs4;
} //close the k loop
Rpos += rs4;
Wpos = Wposj + ws4;
} //close the j loop
} // close the tc loop
} //cc
} //rc
} //t
} //c
} //r
} //i
} //for fail 6
} //for fail 5
} //for fail 4
} //for fail 3
} //for fail 2
} // for fail 1
///////////// < /code generated by ConvTransp3D >
"""
return strutil.render_string(codeSource, locals())
convTransp3D = ConvTransp3D()
# If the input size wasn't a multiple of D we may need to cause some automatic padding to get the right size of reconstruction
def computeR(W, b, d, H, Rshape=None):
assert len(W.shape) == 5
assert len(H.shape) == 5
assert len(b.shape) == 1
assert len(d) == 3
outputChannels, filterHeight, filterWidth, filterDur, \
inputChannels = W.shape
batchSize, outputHeight, outputWidth, outputDur, \
outputChannelsAgain = H.shape
assert outputChannelsAgain == outputChannels
assert b.shape[0] == inputChannels
dr, dc, dt = d
assert dr > 0
assert dc > 0
assert dt > 0
videoHeight = (outputHeight - 1) * dr + filterHeight
videoWidth = (outputWidth - 1) * dc + filterWidth
videoDur = (outputDur - 1) * dt + filterDur
if Rshape is not None and Rshape[0] != -1:
if Rshape[0] < videoHeight:
print((Rshape[0], videoHeight))
assert False
assert Rshape[1] >= videoWidth
assert Rshape[2] >= videoDur
# print "setting video size to Rshape = "+str(Rshape)
videoHeight, videoWidth, videoDur = Rshape
# else:
# print "No Rshape passed in"
# print "video size: "+str((videoHeight, videoWidth, videoDur))
R = np.zeros((batchSize, videoHeight,
videoWidth, videoDur, inputChannels), dtype=H.dtype)
# R[i,j,r,c,t] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, j, rk, ck, tk] * H[i,k,rc,cc,tc]
for i in xrange(0, batchSize):
# print '\texample '+str(i+1)+'/'+str(batchSize)
for j in xrange(0, inputChannels):
# print '\t\tfeature map '+str(j+1)+'/'+str(inputChannels)
for r in xrange(0, videoHeight):
# print '\t\t\trow '+str(r+1)+'/'+str(videoHeight)
for c in xrange(0, videoWidth):
for t in xrange(0, videoDur):
R[i, r, c, t, j] = b[j]
ftc = max([0, int(np.ceil(
float(t - filterDur + 1) / float(dt)))])
fcc = max([0, int(np.ceil(
float(c - filterWidth + 1) / float(dc)))])
rc = max([0, int(np.ceil(
float(r - filterHeight + 1) / float(dr)))])
while rc < outputHeight:
rk = r - rc * dr
if rk < 0:
break
cc = fcc
while cc < outputWidth:
ck = c - cc * dc
if ck < 0:
break
tc = ftc
while tc < outputDur:
tk = t - tc * dt
if tk < 0:
break
R[i, r, c, t, j] += np.dot(
W[:, rk, ck, tk, j], H[i, rc, cc, tc, :])
tc += 1
"" # close loop over tc
cc += 1
"" # close loop over cc
rc += 1
"" # close loop over rc
"" # close loop over t
"" # close loop over c
"" # close loop over r
"" # close loop over j
"" # close loop over i
return R
...@@ -22,9 +22,6 @@ from .nnet import ( ...@@ -22,9 +22,6 @@ from .nnet import (
confusion_matrix, softsign) confusion_matrix, softsign)
from . import opt from . import opt
from .conv import ConvOp from .conv import ConvOp
from .Conv3D import *
from .ConvGrad3D import *
from .ConvTransp3D import *
from .sigm import (softplus, sigmoid, sigmoid_inplace, from .sigm import (softplus, sigmoid, sigmoid_inplace,
scalar_sigmoid, ultra_fast_sigmoid, scalar_sigmoid, ultra_fast_sigmoid,
hard_sigmoid) hard_sigmoid)
......
...@@ -854,35 +854,9 @@ class ConvOp(OpenMPOp): ...@@ -854,35 +854,9 @@ class ConvOp(OpenMPOp):
raise NotImplementedError('todo') raise NotImplementedError('todo')
if self.out_mode == 'valid' and (self.dx, self.dy) != (1, 1): if self.out_mode == 'valid' and (self.dx, self.dy) != (1, 1):
# Use the gradient as defined in conv3D, because the implementation raise NotImplementedError(
# by Conv is slow (about 3x slower than conv3D, and probably 10x "ERROR: ConvOp.grad is now disabled for 'valid' convolutions with"
# slower than it could be), and incorrect when dx or dy > 2. " stride != (1, 1); call theano.tensor.nnet.conv2d() instead.")
# build a "node", that should be equivalent to the one given by
# self.make_node, but using conv3D instead of self.
shuffled_inputs = inputs.dimshuffle(0, 2, 3, 'x', 1)
if inputs.name is not None:
shuffled_inputs.name = 'shuffle_for_conv3D(%s)' % inputs.name
flipped_kerns = kerns[:, :, ::-1, ::-1]
if kerns.name is not None:
flipped_kerns.name = 'flipped(%s)' % kerns.name
shuffled_kerns = flipped_kerns.dimshuffle(0, 2, 3, 'x', 1)
if flipped_kerns.name is not None:
shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name
tmp_node = theano.tensor.nnet.conv3D(
V=shuffled_inputs,
W=shuffled_kerns,
b=theano.tensor.alloc(np.asarray(0, dtype=kerns.dtype),
kerns.shape[0]),
d=(self.dx, self.dy, 1))
node = theano.tensor.addbroadcast(
tmp_node, 3).dimshuffle(0, 4, 1, 2)
# mimic what happens inside theano.grad: get the input gradient
# of the final cost wrt all variables involved.
return theano.gradient.grad(cost=None, known_grads={node: gz},
wrt=[inputs, kerns])
if self.dx not in (1, 2) or self.dy not in (1, 2): if self.dx not in (1, 2) or self.dy not in (1, 2):
raise NotImplementedError( raise NotImplementedError(
......
...@@ -30,9 +30,6 @@ from theano.tensor import opt ...@@ -30,9 +30,6 @@ from theano.tensor import opt
# Cpu implementation # Cpu implementation
from theano.tensor.nnet.conv import conv2d, ConvOp from theano.tensor.nnet.conv import conv2d, ConvOp
from theano.tensor.nnet.Conv3D import conv3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D
from theano.tensor.nnet.ConvTransp3D import convTransp3D
@gof.local_optimizer([SparseBlockGemv], inplace=True) @gof.local_optimizer([SparseBlockGemv], inplace=True)
...@@ -257,39 +254,6 @@ def local_conv2d_cpu(node): ...@@ -257,39 +254,6 @@ def local_conv2d_cpu(node):
return [rval] return [rval]
@local_optimizer([AbstractConv3d])
def local_conv3d_cpu(node):
if not isinstance(node.op, AbstractConv3d):
return None
img, kern = node.inputs
if ((not isinstance(img.type, TensorType) or
not isinstance(kern.type, TensorType))):
return None
if node.op.border_mode not in ['valid', (0, 0, 0)]:
return None
if node.op.filter_dilation != (1, 1, 1):
return None
if node.op.num_groups > 1:
return None
bias = theano.tensor.zeros_like(kern[:, 0, 0, 0, 0])
# need to flip the kernel if necessary (conv3D does not flip)
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
# conv3D expects shape (batch, row, column, time, channel)
img = img.dimshuffle(0, 2, 3, 4, 1)
kern = kern.dimshuffle(0, 2, 3, 4, 1)
rval = conv3D(img, kern, bias, node.op.subsample)
copy_stack_trace(node.outputs[0], rval)
rval = rval.dimshuffle(0, 4, 1, 2, 3)
return [rval]
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_cpu(node): def local_conv2d_gradweight_cpu(node):
if (not isinstance(node.op, AbstractConv2d_gradWeights) or if (not isinstance(node.op, AbstractConv2d_gradWeights) or
...@@ -311,28 +275,7 @@ def local_conv2d_gradweight_cpu(node): ...@@ -311,28 +275,7 @@ def local_conv2d_gradweight_cpu(node):
if node.op.border_mode == 'valid' and \ if node.op.border_mode == 'valid' and \
(node.op.subsample != (1, 1)): (node.op.subsample != (1, 1)):
# Use the gradient as defined in conv3D, because the implementation return None
# by Conv is slow (about 3x slower than conv3D, and probably 10x
# slower than it could be), and incorrect when subsample > 2.
# build a "node", that should be equivalent to the one given by
# self.make_node, but using convGrad3D instead.
shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1)
shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
rval = convGrad3D(V=shuffled_img,
d=(node.op.subsample[0], node.op.subsample[1], 1),
WShape=(shuffled_topgrad.shape[4],
shape[0], shape[1], 1,
shuffled_img.shape[4]),
dCdH=shuffled_topgrad)
copy_stack_trace(node.outputs[0], rval)
rval = theano.tensor.addbroadcast(rval, 3)
rval = rval.dimshuffle(0, 4, 1, 2)
rval = rval[:, :, ::-1, ::-1]
rval = theano.tensor.patternbroadcast(rval,
node.outputs[0].broadcastable)
copy_stack_trace(node.outputs[0], rval)
return [rval]
dx, dy = node.op.subsample dx, dy = node.op.subsample
if dx not in (1, 2) or dy not in (1, 2): if dx not in (1, 2) or dy not in (1, 2):
...@@ -411,41 +354,6 @@ def local_conv2d_gradweight_cpu(node): ...@@ -411,41 +354,6 @@ def local_conv2d_gradweight_cpu(node):
return [res] return [res]
@local_optimizer([AbstractConv3d_gradWeights])
def local_conv3d_gradweight_cpu(node):
if not isinstance(node.op, AbstractConv3d_gradWeights):
return None
img, topgrad, shape = node.inputs
if ((not isinstance(img.type, TensorType) or
not isinstance(topgrad.type, TensorType))):
return None
if node.op.border_mode not in ['valid', (0, 0, 0)]:
return None
if node.op.filter_dilation != (1, 1, 1):
return None
if node.op.num_groups > 1:
return None
# conv3D expects shape (batch, row, column, time, channel)
img = img.dimshuffle(0, 2, 3, 4, 1)
topgrad = topgrad.dimshuffle(0, 2, 3, 4, 1)
W_shape = (topgrad.shape[4], shape[0], shape[1], shape[2], img.shape[4])
rval = convGrad3D(img, node.op.subsample, W_shape, topgrad)
copy_stack_trace(node.outputs[0], rval)
rval = rval.dimshuffle(0, 4, 1, 2, 3)
# need to flip the kernel if necessary (conv3D does not flip)
if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1, ::-1]
rval = theano.tensor.patternbroadcast(rval,
node.outputs[0].broadcastable)
return [rval]
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_cpu(node): def local_conv2d_gradinputs_cpu(node):
if (not isinstance(node.op, AbstractConv2d_gradInputs) or if (not isinstance(node.op, AbstractConv2d_gradInputs) or
...@@ -467,22 +375,8 @@ def local_conv2d_gradinputs_cpu(node): ...@@ -467,22 +375,8 @@ def local_conv2d_gradinputs_cpu(node):
# Conv 3d implementation, needed when subsample > 2 # Conv 3d implementation, needed when subsample > 2
if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): if node.op.border_mode == 'valid' and node.op.subsample != (1, 1):
kern = kern[:, :, ::-1, ::-1] # The op don't support that anymore.
shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) return False
shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :])
rval = convTransp3D(W=shuffled_kern, b=b,
d=(node.op.subsample[0], node.op.subsample[1], 1),
H=shuffled_topgrad,
RShape=(shape[0], shape[1], 1))
copy_stack_trace(node.outputs[0], rval)
rval = theano.tensor.addbroadcast(rval, 3)
rval = rval.dimshuffle(0, 4, 1, 2)
rval = theano.tensor.patternbroadcast(rval,
node.outputs[0].broadcastable)
copy_stack_trace(node.outputs[0], rval)
return [rval]
# Conv2d Implementation # Conv2d Implementation
dx, dy = node.op.subsample dx, dy = node.op.subsample
...@@ -538,40 +432,6 @@ def local_conv2d_gradinputs_cpu(node): ...@@ -538,40 +432,6 @@ def local_conv2d_gradinputs_cpu(node):
return [din] return [din]
@local_optimizer([AbstractConv3d_gradInputs])
def local_conv3d_gradinputs_cpu(node):
if not isinstance(node.op, AbstractConv3d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if ((not isinstance(kern.type, TensorType) or
not isinstance(topgrad.type, TensorType))):
return None
if node.op.border_mode not in ['valid', (0, 0, 0)]:
return None
if node.op.filter_dilation != (1, 1, 1):
return None
if node.op.num_groups > 1:
return None
# need to flip the kernel if necessary (conv3D does not flip)
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
# conv3D expects shape (batch, row, column, time, channel)
kern = kern.dimshuffle(0, 2, 3, 4, 1)
topgrad = topgrad.dimshuffle(0, 2, 3, 4, 1)
bias = theano.tensor.zeros_like(kern[0, 0, 0, 0, :])
rval = convTransp3D(kern, bias, node.op.subsample, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
rval = rval.dimshuffle(0, 4, 1, 2, 3)
rval = theano.tensor.patternbroadcast(rval,
node.outputs[0].broadcastable)
return [rval]
# Register Cpu Optmization # Register Cpu Optmization
conv_groupopt = theano.gof.optdb.LocalGroupDB() conv_groupopt = theano.gof.optdb.LocalGroupDB()
conv_groupopt.__name__ = "conv_opts" conv_groupopt.__name__ = "conv_opts"
...@@ -595,6 +455,7 @@ conv_groupopt.register('local_abstractconv3d_gradweight_gemm', ...@@ -595,6 +455,7 @@ conv_groupopt.register('local_abstractconv3d_gradweight_gemm',
conv_groupopt.register('local_abstractconv3d_gradinputs_gemm', conv_groupopt.register('local_abstractconv3d_gradinputs_gemm',
local_abstractconv3d_gradinputs_gemm, 30, local_abstractconv3d_gradinputs_gemm, 30,
'conv_gemm', 'fast_compile', 'fast_run') 'conv_gemm', 'fast_compile', 'fast_run')
# Legacy convolution # Legacy convolution
conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40, conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
'fast_compile', 'fast_run') 'fast_compile', 'fast_run')
...@@ -604,14 +465,6 @@ conv_groupopt.register('local_conv2d_gradweight_cpu', ...@@ -604,14 +465,6 @@ conv_groupopt.register('local_conv2d_gradweight_cpu',
conv_groupopt.register('local_conv2d_gradinputs_cpu', conv_groupopt.register('local_conv2d_gradinputs_cpu',
local_conv2d_gradinputs_cpu, 40, local_conv2d_gradinputs_cpu, 40,
'fast_compile', 'fast_run') 'fast_compile', 'fast_run')
conv_groupopt.register('local_conv3d_cpu', local_conv3d_cpu, 40,
'fast_compile', 'fast_run')
conv_groupopt.register('local_conv3d_gradweight_cpu',
local_conv3d_gradweight_cpu, 40,
'fast_compile', 'fast_run')
conv_groupopt.register('local_conv3d_gradinputs_cpu',
local_conv3d_gradinputs_cpu, 40,
'fast_compile', 'fast_run')
# Verify that no AbstractConv are present in the graph # Verify that no AbstractConv are present in the graph
......
...@@ -24,14 +24,10 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D ...@@ -24,14 +24,10 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
from theano.tensor.nnet.abstract_conv import bilinear_upsampling from theano.tensor.nnet.abstract_conv import bilinear_upsampling
from theano.tensor.nnet.abstract_conv import separable_conv2d from theano.tensor.nnet.abstract_conv import separable_conv2d
from theano.tensor.nnet.conv import ConvOp
from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights, from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
CorrMM_gradInputs) CorrMM_gradInputs)
from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights, from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
Corr3dMM_gradInputs) Corr3dMM_gradInputs)
from theano.tensor.nnet.Conv3D import Conv3D
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
def conv2d_corr(inputs, filters, border_mode="valid", def conv2d_corr(inputs, filters, border_mode="valid",
...@@ -794,152 +790,6 @@ class TestAbstractConvNoOptim(BaseTestConv2d): ...@@ -794,152 +790,6 @@ class TestAbstractConvNoOptim(BaseTestConv2d):
ref=None, mode=mode) ref=None, mode=mode)
class TestCpuConv2d(BaseTestConv2d):
@classmethod
def setup(cls):
BaseTestConv2d.setup_class()
cls.mode = theano.compile.mode.get_default_mode().excluding('conv_gemm')
cls.opt_err = theano.config.on_opt_error
theano.config.on_opt_error = 'ignore'
@classmethod
def tearDown(cls):
theano.config.on_opt_error = cls.opt_err
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
if fd != (1, 1):
raise SkipTest("No dilation implementation for basic cpu ConvOp.")
if not theano.config.cxx:
raise SkipTest("Need cxx to test conv2d")
mode = self.mode
o = self.get_output_shape(i, f, s, b, fd)
fwd_OK = True
gradweight_OK = True
gradinput_OK = True
if not flip:
fwd_OK = False
gradweight_OK = False
gradinput_OK = False
if b not in ((0, 0), 'valid', 'full'):
fwd_OK = False
gradweight_OK = False
gradinput_OK = False
if (not provide_shape) and (s != (1, 1)) and (b == 'full'):
gradweight_OK = False
gradinput_OK = False
if ((s[0] not in (1, 2)) or (s[1] not in (1, 2))) and (b == 'full'):
gradweight_OK = False
gradinput_OK = False
if fwd_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=(gradweight_OK and gradinput_OK),
mode=mode, provide_shape=provide_shape,
border_mode=b, filter_flip=flip, target_op=ConvOp,
check_trace=True, filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_fwd,
inputs_shape=i,
filters_shape=f,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
if gradweight_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=(ConvOp, ConvGrad3D),
check_trace=True,
filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_gradweight,
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
if gradinput_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=(ConvOp, ConvTransp3D),
check_trace=True,
filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_gradinput,
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if not theano.config.cxx:
raise SkipTest("Need cxx to test conv2d")
if fd != (1, 1):
raise SkipTest("No dilation implementation for basic cpu ConvOp.")
mode = self.mode
if not flip:
return
if b not in ((0, 0), 'valid', 'full'):
return
if (not provide_shape) and (s != (1, 1)) and (b == 'full'):
return
if ((s[0] not in (1, 2)) or (s[1] not in (1, 2))) and (b == 'full'):
return
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=(ConvOp, ConvTransp3D),
check_trace=True,
filter_dilation=fd)
else:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise SkipTest('Inconsistent shapes are not tested for old cpu ConvOp.')
class BaseTestConv3d(BaseTestConv): class BaseTestConv3d(BaseTestConv):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
...@@ -1098,134 +948,6 @@ class TestCorrConv3d(BaseTestConv3d): ...@@ -1098,134 +948,6 @@ class TestCorrConv3d(BaseTestConv3d):
ref=None, check_trace=True, filter_dilation=fd) ref=None, check_trace=True, filter_dilation=fd)
class TestCpuConv3d(BaseTestConv3d):
@classmethod
def setup(cls):
BaseTestConv3d.setup_class()
cls.mode = theano.compile.mode.get_default_mode().excluding('conv_gemm')
cls.opt_err = theano.config.on_opt_error
theano.config.on_opt_error = 'ignore'
@classmethod
def tearDown(cls):
theano.config.on_opt_error = cls.opt_err
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for basic cpu Conv3D.")
if not theano.config.cxx:
raise SkipTest("Need cxx to test conv2d")
if min(i) == 0 or min(f) == 0:
raise SkipTest('Not tested for old cpu Conv3D.')
mode = self.mode
o = self.get_output_shape(i, f, s, b, fd)
fwd_OK = True
gradweight_OK = True
gradinput_OK = True
if b not in ((0, 0, 0), 'valid'):
fwd_OK = False
gradweight_OK = False
gradinput_OK = False
if fwd_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=(gradweight_OK and gradinput_OK),
mode=mode, provide_shape=provide_shape,
border_mode=b, filter_flip=flip, target_op=Conv3D,
check_trace=True, filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_fwd,
inputs_shape=i,
filters_shape=f,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
if gradweight_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=ConvGrad3D,
check_trace=True,
filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_gradweight,
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
if gradinput_OK:
# This test can run even when theano.config.blas.ldflags is empty.
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=ConvTransp3D,
check_trace=True,
filter_dilation=fd)
else:
assert_raises(AssertionError,
self.run_gradinput,
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
mode=mode,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
check_trace=True,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for basic cpu Conv3D.")
mode = self.mode
if min(i) == 0 or min(f) == 0 or min(o) == 0:
raise SkipTest('Not tested for old cpu Conv3D.')
if b not in ((0, 0, 0), 'valid'):
return
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=ConvTransp3D,
check_trace=True,
filter_dilation=fd)
else:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise SkipTest('Inconsistent shapes are not tested for old cpu Conv3D.')
def test_constant_shapes(): def test_constant_shapes():
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops # Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
# are rejected if not constant or None # are rejected if not constant or None
......
...@@ -294,15 +294,17 @@ class TestConv2D(utt.InferShapeTester): ...@@ -294,15 +294,17 @@ class TestConv2D(utt.InferShapeTester):
""" """
Tests convolution where subsampling != (1,1) Tests convolution where subsampling != (1,1)
""" """
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', subsample=(2, 2)) self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'valid', subsample=(3, 3))
# Fails as of 2012-07-11 # Fails as of 2012-07-11
self.assertRaises(NotImplementedError, self.validate, (1, 1, 6, 6), self.assertRaises(NotImplementedError, self.validate, (1, 1, 6, 6),
(1, 1, 3, 3), 'full', subsample=(3, 3)) (1, 1, 3, 3), 'full', subsample=(3, 3))
# Fails as of 2017-08-10
self.assertRaises(NotImplementedError, self.validate, (3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 2))
self.assertRaises(NotImplementedError, self.validate, (3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 1))
self.assertRaises(NotImplementedError, self.validate, (1, 1, 6, 6), (1, 1, 3, 3), 'valid', subsample=(3, 3))
def test_shape_Constant_tensor(self): def test_shape_Constant_tensor(self):
""" """
Tests convolution where the {image,filter}_shape is a Constant tensor. Tests convolution where the {image,filter}_shape is a Constant tensor.
...@@ -604,9 +606,6 @@ class TestConv2D(utt.InferShapeTester): ...@@ -604,9 +606,6 @@ class TestConv2D(utt.InferShapeTester):
excluding=['conv_gemm']) excluding=['conv_gemm'])
class TestDefaultConv2D(TestConv2D):
conv2d = staticmethod(theano.tensor.nnet.conv2d)
# Test that broadcasting of gradients works correctly when using the # Test that broadcasting of gradients works correctly when using the
# nnet.conv2d() interface. This was reported in #3763, and uses the example # nnet.conv2d() interface. This was reported in #3763, and uses the example
# code from that ticket. # code from that ticket.
......
from __future__ import absolute_import, print_function, division
import theano
import theano.tensor as T
from theano import function, shared
from theano.tests import unittest_tools as utt
from theano.tensor.nnet.ConvTransp3D import convTransp3D, ConvTransp3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D, ConvGrad3D
from theano.tensor.nnet.Conv3D import conv3D, Conv3D
from theano.tests.unittest_tools import attr
from nose.plugins.skip import SkipTest
import numpy as N
from six.moves import xrange
import copy
import theano.sparse
if theano.sparse.enable_sparse:
from scipy import sparse
floatX = theano.config.floatX
# TODO: each individual test method should seed rng with utt.fetch_seed()
# as it is right now, setUp does the seeding, so if you run just
# a subset of the tests they will do different things than if you
# run all of them
class DummyConv3D:
"""A dummy version of Conv3D passed to verify_grad
Stores a fixed stride, since stride is not differentiable
Exposes only one scalar argument, which is used as the position
along a parametrically defined line, with 0 being at VwbVals
Direction of the line is chosen randomly at construction
The reason for locking the inputs to lie on this line is so that the
verify_grad will not need to test hundreds of variables. Disadvantage
is we can't be certain that all of them are correct, advantange is that
this random projection lets us test lots of variables very quickly """
def __init__(self, rng, VWbVals, d):
"""
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.V, self.W, self.b = VWbVals
self.dV = shared(rng.uniform(-1, 1,
self.V.get_value(borrow=True).shape))
self.dW = shared(rng.uniform(-1, 1,
self.W.get_value(borrow=True).shape))
self.db = shared(rng.uniform(-1, 1,
self.b.get_value(borrow=True).shape))
self.d = d
def __call__(self, t):
output = conv3D(self.V + t * self.dV, self.W + t * self.dW,
self.b + t * self.db, self.d)
return output
class DummyConvGrad3D:
def __init__(self, rng, VdHvals, d, WShape):
"""
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.V, self.dCdH = VdHvals
self.dV = shared(rng.uniform(-1, 1,
self.V.get_value(borrow=True).shape))
self.ddCdH = shared(rng.uniform(
-1, 1, self.dCdH.get_value(borrow=True).shape))
self.d = d
self.WShape = WShape
def __call__(self, t):
output = convGrad3D(self.V + t * self.dV, self.d, self.WShape,
self.dCdH + t * self.ddCdH)
return output
class DummyConvTransp3D:
def __init__(self, rng, WbHvals, d, RShape):
"""
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.W, self.b, self.H = WbHvals
self.dW = rng.uniform(-1, 1, self.W.get_value(borrow=True).shape)
self.db = rng.uniform(-1, 1, self.b.get_value(borrow=True).shape)
self.dH = rng.uniform(-1, 1, self.H.get_value(borrow=True).shape)
self.dW, self.db = shared(self.dW), shared(self.db),
self.dH = shared(self.dH)
self.d = d
self.RShape = RShape
def __call__(self, t):
output = convTransp3D(self.W + t * self.dW, self.b + t * self.db,
self.d, self.H + t * self.dH, self.RShape)
return output
class TestConv3D(utt.InferShapeTester):
def setUp(self):
super(TestConv3D, self).setUp()
utt.seed_rng()
self.rng = N.random.RandomState(utt.fetch_seed())
mode = copy.copy(theano.compile.mode.get_default_mode())
mode.check_py_code = False
self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.W.name = 'W'
self.b = shared(N.zeros(1, dtype=floatX))
self.b.name = 'b'
self.rb = shared(N.zeros(1, dtype=floatX))
self.rb.name = 'rb'
self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.V.name = 'V'
self.d = shared(N.ones(shape=(3, ), dtype=int))
self.d.name = 'd'
self.H = conv3D(self.V, self.W, self.b, self.d)
self.H.name = 'H'
self.H_func = function([], self.H, mode=mode)
self.H_shape_func = function([], self.H.shape, mode=mode)
self.RShape = T.vector(dtype='int64')
self.RShape.name = 'RShape'
self.otherH = T.TensorType(
floatX, (False, False, False, False, False))(name='otherH')
self.transp = convTransp3D(self.W, self.rb, self.d,
self.otherH, self.RShape)
self.transp.name = 'transp'
self.transp_func = function([self.otherH, self.RShape],
self.transp, mode=mode)
self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape)
self.R.name = 'R'
self.R_func = function([self.RShape], self.R, mode=mode)
self.R_shape_func = function([self.RShape], self.R.shape)
diff = self.V - self.R
diff.name = 'diff'
sqr = T.sqr(diff)
sqr.name = 'sqr'
self.reconsObj = T.sum(sqr)
self.reconsObj.name = 'reconsObj'
self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode)
W_grad = T.grad(self.reconsObj, self.W)
self.gradientsFunc = function(
[self.RShape],
[W_grad, T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode=mode)
self.check_c_against_python = function(
[self.RShape],
[T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE')
self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
def random_tensor(self, *dims):
return N.asarray(self.rng.uniform(-.05, .05, dims), dtype=floatX)
def randomize(self):
batchSize = self.rng.randint(1, 4)
videoDur = self.rng.randint(8, 30)
filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1, 8)
tsteps = self.rng.randint(1, 4)
rsteps = self.rng.randint(1, 4)
csteps = self.rng.randint(1, 4)
videoDur = tsteps * filterDur + self.rng.randint(0, 3)
videoWidth = csteps * filterWidth + self.rng.randint(0, 3)
videoHeight = rsteps * filterHeight + self.rng.randint(0, 3)
numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1, 3)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
int((videoHeight - filterHeight) /
self.d.get_value(borrow=True)[0]) + 1
int((videoWidth - filterWidth) /
self.d.get_value(borrow=True)[1]) + 1
int((videoDur - filterDur) /
self.d.get_value(borrow=True)[2]) + 1
self.W.set_value(self.random_tensor(
numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
self.V.set_value(self.random_tensor(
batchSize, videoHeight,
videoWidth, videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
def test_c_against_python(self):
self.randomize()
self.check_c_against_python(self.V.get_value(borrow=True).shape[1:4])
@attr('slow')
def test_c_against_mat_mul(self):
# Use a filter of the same size as the image, so the convolution is
# just a dense matrix multiply.
# Check that dense matrix multiplication gives the same result as
# convolution.
batchSize = self.rng.randint(1, 10)
videoDur = self.rng.randint(3, 10)
videoWidth = self.rng.randint(1, 5)
videoHeight = self.rng.randint(1, 5)
filterWidth = videoWidth
filterHeight = videoHeight
filterDur = videoDur
numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1, 4)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value(self.random_tensor(
numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.W.set_value(
self.W.get_value(borrow=True) *
(self.W.get_value(borrow=True) < 1e-5), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value(self.random_tensor(
batchSize, videoHeight, videoWidth, videoDur, inputChannels), borrow=True)
Hv = self.H_func()
assert Hv.shape[1] == 1
assert Hv.shape[2] == 1
assert Hv.shape[3] == 1
n = inputChannels * videoHeight * videoWidth * videoDur
W_mat = N.zeros((n, numFilters))
V_mat = N.zeros((batchSize, n))
Hv_mat = N.zeros((batchSize, numFilters))
for qi in xrange(0, numFilters):
W_mat[:, qi] = \
self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
Hv_mat[:, qi] = Hv[:, 0, 0, 0, qi]
for qi in xrange(0, batchSize):
V_mat[qi, :] = \
self.V.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
H_mat = N.dot(V_mat, W_mat) + self.b.get_value(borrow=True)
tol = 1e-5
if floatX == 'float32':
tol = 1e-4
if N.abs(H_mat - Hv_mat).max() > tol and not N.allclose(H_mat, Hv_mat):
print(H_mat)
print(Hv_mat)
print('max error: ' + str(N.abs(H_mat - Hv_mat).max()))
self.W.get_value(borrow=True)[self.W.get_value(borrow=True) != 0] += 1.0
print('min non-zero kernel mag: ' + str(
N.abs(self.W.get_value(borrow=True)).min()))
assert False
def test_c_against_mat_transp_mul(self):
# Use a filter of the same size as the image, so the convolution is just a
# dense matrix multiply.
# Check that dense matrix multiplication by the transpose of the matrix
# gives the same result as ConvTransp.
batchSize = self.rng.randint(1, 10)
videoDur = self.rng.randint(3, 15)
videoWidth = self.rng.randint(3, 15)
videoHeight = self.rng.randint(3, 15)
filterWidth = videoWidth
filterHeight = videoHeight
filterDur = videoDur
numFilters = self.rng.randint(1, 15)
inputChannels = self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value(self.random_tensor(
numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value(self.random_tensor(
batchSize, videoHeight,
videoWidth, videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func()
assert H_shape[1] == 1
assert H_shape[2] == 1
assert H_shape[3] == 1
Hv = self.random_tensor(* H_shape)
Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0, inputChannels):
rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n))
W_mat = N.zeros((numFilters, n))
Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((numFilters, batchSize))
for qi in xrange(0, numFilters):
W_mat[qi, :] = \
self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
Hv_mat[qi, :] = Hv[:, 0, 0, 0, qi]
for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
V_mat = (N.dot(W_mat.transpose(), Hv_mat).transpose() +
rbv).transpose()
if N.abs(V_mat - Vv_mat).max() > 1e-5:
print(V_mat)
print(Vv_mat)
for qq in xrange(V_mat.shape[0]):
for qqq in xrange(Vv_mat.shape[1]):
if abs(V_mat[qq, qqq] - Vv_mat[qq, qqq]) > 1e-5:
print(
('wrong at ' + str((qq, qqq)) + ': ' +
str(V_mat[qq, qqq], Vv_mat[qq, qqq])))
assert False
def test_c_against_sparse_mat_transp_mul(self):
# like test_c_against_mat_transp_mul but using a sparse matrix and a kernel
# that is smaller than the image
if not theano.sparse.enable_sparse:
raise SkipTest('Optional package sparse disabled')
batchSize = self.rng.randint(1, 3)
filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1, 8)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
dr = self.d.get_value(borrow=True)[0]
dc = self.d.get_value(borrow=True)[1]
dt = self.d.get_value(borrow=True)[2]
numFilters = self.rng.randint(1, 3)
row_steps = self.rng.randint(1, 4)
col_steps = self.rng.randint(1, 4)
time_steps = self.rng.randint(1, 4)
# print (row_steps,col_steps,time_steps)
videoDur = (time_steps - 1) * dt + filterDur + self.rng.randint(0, 3)
videoWidth = (col_steps - 1) * dc + filterWidth + self.rng.randint(0, 3)
videoHeight = (row_steps - 1) * dr + filterHeight + self.rng.randint(0, 3)
inputChannels = self.rng.randint(1, 15)
self.W.set_value(self.random_tensor(
numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
# just needed so H_shape works
self.V.set_value(self.random_tensor(
batchSize, videoHeight, videoWidth,
videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func()
# make index maps
h = N.zeros(H_shape[1:], dtype='int32')
r = N.zeros(H_shape[1:], dtype='int32')
c = N.zeros(H_shape[1:], dtype='int32')
t = N.zeros(H_shape[1:], dtype='int32')
for qi in xrange(0, H_shape[4]):
h[:, :, :, qi] = qi
for qi in xrange(0, H_shape[1]):
r[qi, :, :, :] = qi
for qi in xrange(0, H_shape[2]):
c[:, qi, :, :] = qi
for qi in xrange(0, H_shape[3]):
t[:, :, qi, :] = qi
hn = H_shape[1] * H_shape[2] * H_shape[3] * H_shape[4]
h = h.reshape((hn))
r = r.reshape((hn))
c = c.reshape((hn))
t = t.reshape((hn))
Hv = self.random_tensor(*H_shape)
Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0, inputChannels):
rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n))
W_mat = N.zeros((hn, n))
Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((hn, batchSize))
for qi in xrange(0, hn):
hi = h[qi]
ri = r[qi]
ci = c[qi]
ti = t[qi]
placed_filter = N.zeros(self.V.get_value(borrow=True).shape[1:])
placed_filter[
ri * dr:ri * dr + self.W.get_value(borrow=True).shape[1],
ci * dc:ci * dc + self.W.get_value(borrow=True).shape[2],
ti * dt:ti * dt + self.W.get_value(borrow=True).shape[3],
:] = self.W.get_value(borrow=True)[hi, :, :, :, :]
W_mat[qi, :] = placed_filter.reshape((n))
Hv_mat[qi, :] = Hv[:, ri, ci, ti, hi]
for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
W_mat_T = sparse.csr_matrix(W_mat.transpose())
temp = W_mat_T * Hv_mat
V_mat = (temp.transpose() + rbv).transpose()
if N.abs(V_mat - Vv_mat).max() > 1e-5:
print('mul')
print(V_mat)
print('conv')
print(Vv_mat)
for i in xrange(0, n):
for j in xrange(0, batchSize):
if abs(V_mat[i, j] - Vv_mat[i, j]) > 1e-5:
print(('wrong at %d,%d: %f mul versus %f conv'
% (i, j, V_mat[i, j], Vv_mat[i, j])))
assert False
def test_infer_shape(self):
self.randomize()
# Conv3D
self._compile_and_check([], [self.H], [], Conv3D)
# ConvTransp3D
self._compile_and_check(
[self.RShape], [self.R],
[self.V.get_value(borrow=True).shape[1:4]], ConvTransp3D)
# ConvGrad3D
self._compile_and_check(
[self.RShape],
[T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H),
T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)],
[self.V.get_value(borrow=True).shape[1:4]], ConvGrad3D)
def test_gradient(self):
self.randomize()
rng, V, W, b, d, rb = self.rng, self.V, self.W, self.b, self.d, self.rb
dCdH = shared(self.random_tensor(*self.H_shape_func()))
testsPerDir = 2
theano.tests.unittest_tools.verify_grad(DummyConv3D(
rng, (V, W, b), d), [0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(
DummyConvTransp3D(
rng, (W, rb, dCdH), d, V.get_value(borrow=True).shape[1:4]),
[0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(
DummyConvGrad3D(
rng, (V, dCdH), d, W.get_value(borrow=True).shape),
[0.0], n_tests=testsPerDir)
if __name__ == '__main__':
t = TestConv3D('setUp')
t.setUp()
t.test_infer_shape()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论