提交 c072d669 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5267 from gvtulder/f-abstractconv-differences

Minor inconsistency in AbstractConv_gradInput implementations
差异被折叠。
......@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const size_t dil_kW = (kW - 1) * dilW + 1;
const size_t dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const size_t topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1;
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const size_t topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const size_t topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] ||
......@@ -479,6 +487,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject *output;
if (direction == 0) { // forward pass
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im3d2col, then gemm
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......@@ -530,6 +549,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im3col, then gemm
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......@@ -581,9 +611,29 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
return NULL;
}
}
if (batchSize == 0) {
err = GpuArray_memset(&weight->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
}
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im3d
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......
......@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] ||
......@@ -411,6 +418,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject *output;
if (direction == 0) { // forward pass
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......@@ -462,6 +480,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......@@ -516,6 +545,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im
// Iterate over batch
for (size_t n = 0; n < batchSize; n++) {
......
......@@ -24,7 +24,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs,
get_conv_output_shape)
get_conv_output_shape,
assert_conv_shape)
from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad)
from . import pygpu
......@@ -979,11 +980,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(
shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3)
out_shp = (shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph),
shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = gpu_dnn_conv_gradW()(img, kerns, out, desc)
......@@ -997,11 +999,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(shape_i(img, 0, fgraph),
out_shp = (shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph),
shape2, shape3)
shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return gpu_dnn_conv_gradI()(kerns, img, out, desc)
......@@ -1021,6 +1024,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
return gpu_dnn_conv(algo=algo)(img, kerns, out, desc)
......@@ -1094,12 +1098,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
shape4 = shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(
shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3, shape4)
out_shp = (shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph),
shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = gpu_dnn_conv_gradW()(img, kerns, out, desc)
......@@ -1113,12 +1118,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
shape4 = shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(shape_i(img, 0, fgraph),
out_shp = (shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph),
shape2, shape3, shape4)
shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1,
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return gpu_dnn_conv_gradI()(kerns, img, out, desc)
......@@ -1138,6 +1144,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
return gpu_dnn_conv(algo=algo)(img, kerns, out, desc)
......
......@@ -39,11 +39,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)&alpha;
......@@ -71,6 +66,20 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1;
#endif
if (PyGpuArray_DIMS(input)[0] == 0 || PyGpuArray_DIMS(kerns)[0] == 0 || PyGpuArray_DIMS(kerns)[1] == 0) {
int err2 = GpuArray_memset(&(*output)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
return 1;
......
......@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (im->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)&alpha;
......@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1;
#endif
if (PyGpuArray_DIMS(im)[0] == 0 || PyGpuArray_DIMS(kerns)[0] == 0 || PyGpuArray_DIMS(kerns)[1] == 0) {
int err2 = GpuArray_memset(&(*input)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1;
......@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_enter(c->ctx);
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
PyGpuArray_NDIM(im), expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
if (PyGpuArray_NDIM(im) == 4) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3]);
cuda_exit(c->ctx);
return 1;
}
} else if (PyGpuArray_NDIM(im) == 5) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3]) ||
(PyGpuArray_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
expected_output_dims[4],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3],
PyGpuArray_DIMS(output)[4]);
cuda_exit(c->ctx);
return 1;
}
}
#ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE
reuse_algo = 1;
......
......@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)&alpha;
......@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1;
#endif
if (PyGpuArray_DIMS(input)[0] == 0 || PyGpuArray_DIMS(km)[0] == 0 || PyGpuArray_DIMS(km)[1] == 0) {
int err2 = GpuArray_memset(&(*kerns)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
......@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_enter(c->ctx);
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
PyGpuArray_NDIM(input), expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
if (PyGpuArray_NDIM(input) == 4) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3]);
cuda_exit(c->ctx);
return 1;
}
} else if (PyGpuArray_NDIM(input) == 5) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3]) ||
(PyGpuArray_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
expected_output_dims[4],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3],
PyGpuArray_DIMS(output)[4]);
cuda_exit(c->ctx);
return 1;
}
}
#ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE
reuse_algo = 1;
......
from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import numpy
......@@ -49,6 +50,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
if fd != (1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
......@@ -82,6 +108,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
if fd != (1, 1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod
......@@ -115,6 +166,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
......@@ -148,6 +221,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self):
......
......@@ -12,6 +12,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from .. import dnn
from ..basic_ops import GpuAllocEmpty
......@@ -628,56 +629,50 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1, 1), (2, 2, 2)],
'none')
def _test_conv_gradw(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsample):
def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample)
img_val = numpy.asarray(
img_val,
numpy.random.rand(*img_shape),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
kern_vals,
topgrad_vals = numpy.asarray(
numpy.random.rand(*topgrad_shape),
dtype=theano.config.floatX
)
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if conv_mode == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype=theano.config.floatX)
kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape = theano.shared(numpy.asarray(kerns_shape))
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(out.shape)
)(kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
img,
topgrad,
kerns,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[img, topgrad, kerns],
[conv_grad_w],
[img_val, kern_vals, out_vals],
[img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW
)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'),
T.tensor4('out'),
numpy.random.rand(2, 5, 6, 8),
numpy.random.rand(2, 1, 5, 6),
(5, 2, 6, 13),
(1, 2, 3, 7),
border_mode,
conv_mode,
(1, 1))
......
......@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1;
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1;
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......@@ -478,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
if (direction == 0)
{ // forward pass
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++)
......@@ -527,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{
// backprop wrt. weights
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++)
......@@ -578,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{
// backprop wrt. inputs
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im3d
// Iterate over batch
for (int n = 0; n < batchSize; n++)
......
......@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......@@ -377,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
CudaNdarray *output;
if (direction == 0) { // forward pass
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
......@@ -445,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
......@@ -513,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
......
......@@ -14,7 +14,8 @@ from theano.gof.type import CDataType
from theano.compile import optdb
from theano.compile.ops import shape_i
from theano.tensor.nnet import LogSoftmax, SoftmaxGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from theano.tensor.nnet.abstract_conv import (get_conv_output_shape,
assert_conv_shape)
from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad)
from theano.sandbox.cuda.type import CudaNdarrayType
......@@ -1132,10 +1133,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3)
out_shp = (shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph),
shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross', precision=precision)(img.shape,
out.shape)
......@@ -1149,10 +1152,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
out = gpu_alloc_empty(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph), shape2, shape3)
out_shp = (shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph),
shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode, precision=precision)(out.shape,
kerns.shape)
......@@ -1170,6 +1175,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode,
desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
......@@ -1248,11 +1254,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
shape4 = shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3, shape4)
out_shp = (shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph),
shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross', precision=precision)(img.shape,
out.shape)
......@@ -1271,6 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp = GpuDnnConv3d.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode,
desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
return GpuDnnConv3d(algo=algo)(img, kerns, out, desc)
......
......@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(input);
#ifdef CONV_INPLACE
......@@ -30,6 +25,22 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1;
#endif
if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
cudaError_t err2 = cudaMemset((*output)->devdata, 0,
CudaNdarray_SIZE(*output) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
return 1;
......
......@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE
......@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1;
#endif
if (CudaNdarray_DIMS(im)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
cudaError_t err2 = cudaMemset((*input)->devdata, 0,
CudaNdarray_SIZE(*input) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{
size_t worksize;
void *workspace;
......
......@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE
......@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1;
#endif
if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(km)[0] == 0 || CudaNdarray_DIMS(km)[1] == 0) {
cudaError_t err2 = cudaMemset((*kerns)->devdata, 0,
CudaNdarray_SIZE(*kerns) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{
size_t worksize;
void *workspace;
......
......@@ -13,6 +13,7 @@ from theano.sandbox.cuda.blas import (
GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs,
GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs)
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
......@@ -57,6 +58,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if fd != (1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
......@@ -91,6 +117,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
filter_flip=flip, target_op=GpuDnnConv3dGradI,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod
......@@ -124,6 +175,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
......@@ -157,6 +230,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self):
......
......@@ -4,6 +4,7 @@ import os
import sys
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import chain, product
import six.moves.cPickle as pickle
from six import StringIO
......@@ -16,6 +17,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared
......@@ -979,99 +981,105 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv3d
)
def test_conv_gradw(self):
def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample)
img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8),
dtype='float32'
numpy.random.rand(*img_shape),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6),
dtype='float32'
topgrad_vals = numpy.asarray(
numpy.random.rand(*topgrad_shape),
dtype=theano.config.floatX
)
for params in product(
['valid', 'full', 'half'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape = theano.shared(numpy.asarray(kerns_shape))
topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(temp_img.shape, out.shape)
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
img,
topgrad,
kerns,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[img, topgrad, kerns],
[conv_grad_w],
[img_val, kern_vals, out_vals],
[img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW
)
def test_conv3d_gradw(self):
border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'),
(5, 2, 6, 13),
(1, 2, 3, 7),
border_mode,
conv_mode,
(1, 1))
def _test_conv3d_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
raise SkipTest('"cuDNN 3D convolution requires cuDNN v2')
img = T.ftensor5('img')
kerns = T.ftensor5('kerns')
out = T.ftensor5('out')
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample)
img_val = numpy.asarray(
numpy.random.rand(9, 2, 4, 8, 13),
dtype='float32'
numpy.random.rand(*img_shape),
dtype=theano.config.floatX
)
kern_vals = numpy.asarray(
numpy.random.rand(11, 2, 3, 1, 4),
dtype='float32'
topgrad_vals = numpy.asarray(
numpy.random.rand(*topgrad_shape),
dtype=theano.config.floatX
)
for params in product(
['valid', 'full', 'half'],
[(1, 1, 1), (2, 2, 2)],
['conv', 'cross']
):
out_vals = numpy.zeros(
dnn.GpuDnnConv3d.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
dtype='float32')
kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape = theano.shared(numpy.asarray(kerns_shape))
topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(img.shape, out.shape)
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConv3dGradW()(
img,
out,
topgrad,
kerns,
desc,
)
self._compile_and_check(
[img, out, kerns],
[img, topgrad, kerns],
[conv_grad_w],
[img_val, out_vals, kern_vals],
[img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConv3dGradW
)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_gradw(self, border_mode, conv_mode):
self._test_conv3d_gradw(T.tensor5('img'),
T.tensor5('topgrad'),
T.tensor5('kerns'),
(5, 2, 6, 13, 21),
(1, 2, 3, 7, 9),
border_mode,
conv_mode,
(1, 1, 1))
def test_conv_gradi(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
......
......@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (1, self.openmp, blas_header_version())
return (5, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -234,17 +234,17 @@ class BaseCorrMM(gof.OpenMPOp):
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
if height:
height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '-1'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
if width:
width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width
else:
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '-1'
sub = sub.copy()
sub.update(locals())
......@@ -268,15 +268,15 @@ class BaseCorrMM(gof.OpenMPOp):
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
int kH, kW, dil_kH, dil_kW;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW = %(width)s;
}
else if (padW == -2) {
......@@ -299,8 +300,8 @@ class BaseCorrMM(gof.OpenMPOp):
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
dil_kH = (kH - 1) * dilH + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
if (padH == -1) { // vertical half padding
......@@ -334,6 +335,21 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
%(fail)s
}
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
......@@ -342,14 +358,44 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)s
}
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)s
}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
......@@ -491,13 +537,13 @@ class CorrMM_gradWeights(BaseCorrMM):
raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None:
if self.subsample != (1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"')
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')]
else:
height_width = []
else:
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')]
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False]
......@@ -588,9 +634,13 @@ class CorrMM_gradInputs(BaseCorrMM):
raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None:
if shape is None:
if self.subsample != (1, 1):
raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')] if self.subsample != (1, 1) else []
height_width = []
else:
height_width = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64')]
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False]
......
......@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
......@@ -245,7 +253,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
char Trans = 'T';
PyArrayObject *output;
if (direction == 0) { // forward pass
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top;
// valid correlation: im3d2col, then gemm
// Iterate over batch
......
......@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
......@@ -219,7 +226,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
char Trans = 'T';
PyArrayObject *output;
if (direction == 0) { // forward pass
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top;
// valid correlation: im2col, then gemm
// Iterate over batch
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论