提交 7f78fce1 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: --global

make cudnn conv3d gradI and gradW works

上级 6117f98b
...@@ -908,7 +908,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW): ...@@ -908,7 +908,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
### Only workmem = 'none' work with cudnn conv 3d ### Only workmem = 'none' work with cudnn conv 3d
super(GpuDnnConv3dGradW, self).__init(inplace=inplace, workmem='none') super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none')
def grad(self, inp, grads): def grad(self, inp, grads):
img, top, output, desc, alpha, beta, nb_dim = inp img, top, output, desc, alpha, beta, nb_dim = inp
...@@ -1051,7 +1051,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -1051,7 +1051,7 @@ class GpuDnnConvGradI(DnnBase, COp):
class GpuDnnConvGrad3dI(GpuDnnConvGradI): class GpuDnnConv3dGradI(GpuDnnConvGradI):
""" """
The convolution gradient with respect to the inputs. The convolution gradient with respect to the inputs.
...@@ -1065,7 +1065,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI): ...@@ -1065,7 +1065,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
'descriptor', 'alpha', 'beta') 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False): def __init__(self, inplace=False):
super(GpuDnnConvGradI, self).__init__(inplace) super(GpuDnnConv3dGradI, self).__init__(inplace)
def grad(self, inp, grads): def grad(self, inp, grads):
...@@ -1091,7 +1091,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI): ...@@ -1091,7 +1091,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
raise TypeError('kern must be 5D tensor') raise TypeError('kern must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if output.type.ndim != 4: if output.type.ndim != 5:
raise TypeError('output must be 5D tensor') raise TypeError('output must be 5D tensor')
if not isinstance(desc.type, CDataType) \ if not isinstance(desc.type, CDataType) \
...@@ -1107,90 +1107,6 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI): ...@@ -1107,90 +1107,6 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross'. (default: 'conv')
:param direction_hint: Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1,1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1,1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:param workmem: Specify the amount of working memory allowed.
More memory is usually faster. One of 'none', 'small' or
'large'. (default is None which takes its value from
:attr:`config.dnn.conv.workmem`)
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1, 1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
if conv_mode == 'conv':
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross')(img.shape, out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3))
elif (border_mode == 'full' and subsample == (1, 1) and
direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution.
img = gpu_contiguous(img) # cudnn v1 and v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
out = gpu_alloc_empty(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph), shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(out.shape, kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc)
# Standard case: We use GpuDnnConv with suitable padding.
# contig_version will return a gpu_contiguous copy
# if the img contains negative strides
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape)
desc_op = desc.owner.op
out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode,
desc_op.subsample)
out = gpu_alloc_empty(*out_shp)
return GpuDnnConv(workmem=workmem)(img, kerns, out, desc)
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None): conv_mode='conv', direction_hint=None, workmem=None):
""" """
......
#section support_code #section support_code
static cudnnHandle_t _handle = NULL; static cudnnHandle_t _handle = NULL;
static int
c_set_tensor4d(CudaNdarray *var, cudnnTensorDescriptor_t desc) {
cudnnStatus_t err = cudnnSetTensor4dDescriptorEx(
desc, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[0]?CudaNdarray_HOST_STRIDES(var)[0]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3]*CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_STRIDES(var)[1]?CudaNdarray_HOST_STRIDES(var)[1]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[2]?CudaNdarray_HOST_STRIDES(var)[2]:CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[3]?CudaNdarray_HOST_STRIDES(var)[3]:1
);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set tensor4d descriptor: %s"
"shapes=%d %d %d %d strides=%d %d %d %d",
cudnnGetErrorString(err),
CudaNdarray_HOST_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[0]?CudaNdarray_HOST_STRIDES(var)[0]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3]*CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_STRIDES(var)[1]?CudaNdarray_HOST_STRIDES(var)[1]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[2]?CudaNdarray_HOST_STRIDES(var)[2]:CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[3]?CudaNdarray_HOST_STRIDES(var)[3]:1
);
return -1;
}
return 0;
}
static int static int
c_set_tensorNd(CudaNdarray *var, int dim, cudnnTensorDescriptor_t desc) { c_set_tensorNd(CudaNdarray *var, int dim, cudnnTensorDescriptor_t desc) {
...@@ -85,36 +53,6 @@ c_set_filterNd(CudaNdarray *var, int dim, cudnnFilterDescriptor_t desc) { ...@@ -85,36 +53,6 @@ c_set_filterNd(CudaNdarray *var, int dim, cudnnFilterDescriptor_t desc) {
return 0; return 0;
} }
static int
c_set_filter(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
if (!CudaNdarray_is_c_contiguous(var)) {
PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported.");
return -1;
}
cudnnStatus_t err = cudnnSetFilter4dDescriptor(
desc, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3]
);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s."
" dims= %d %d %d %d",
cudnnGetErrorString(err),
CudaNdarray_HOST_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3]);
return -1;
}
return 0;
}
#section init_code #section init_code
{ {
......
...@@ -16,10 +16,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -16,10 +16,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1; return 1;
if (c_set_filterNd(kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*output); Py_XDECREF(*output);
...@@ -35,19 +31,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -35,19 +31,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (c_set_tensorNd(*output, nb_dim, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(*output, nb_dim, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
/* if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
{ {
size_t worksize; size_t worksize;
void *workspace; void *workspace;
cudnnConvolutionFwdAlgo_t chosen_algo; cudnnConvolutionFwdAlgo_t chosen_algo;
for (int i = 0; (i < nb_dim); i++)
std::cout << i << "/" << nb_dim << ", "
<< CudaNdarray_HOST_DIMS(input)[i] << ", "
<< CudaNdarray_HOST_DIMS(kerns)[i] << std::endl;
if (CHOOSE_ALGO) if (CHOOSE_ALGO)
{ {
...@@ -222,7 +210,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -222,7 +210,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(*output)); APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(*output));
} }
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
std::cout << "here2" << std::endl;
PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
return 1; return 1;
......
...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1; return 1;
} }
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if (c_set_tensorNd(output, nb_dim, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(output, nb_dim, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
if (c_set_filterNd(kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1)
...@@ -33,9 +28,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -33,9 +28,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1; return 1;
#endif #endif
/* if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
if (c_set_tensorNd(*input, nb_dim, APPLY_SPECIFIC(input)) == -1) if (c_set_tensorNd(*input, nb_dim, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
......
...@@ -48,7 +48,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -48,7 +48,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
// Check if the input and the output have the same shape as they have // Check if the input and the output have the same shape as they have
// last time the apply node was executed // last time the apply node was executed
bool same_shapes = true; bool same_shapes = true;
for (int i = 0; (i < 4) && same_shapes; i++) for (int i = 0; (i < nb_dim) && same_shapes; i++)
{ {
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] != same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] !=
APPLY_SPECIFIC(previous_input_shape)[i]); APPLY_SPECIFIC(previous_input_shape)[i]);
...@@ -93,7 +93,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -93,7 +93,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
// Store the shapes of the inputs and kernels as well as the chosen // Store the shapes of the inputs and kernels as well as the chosen
// algorithm for future use. // algorithm for future use.
APPLY_SPECIFIC(previous_bwd_f_algo) = chosen_algo; APPLY_SPECIFIC(previous_bwd_f_algo) = chosen_algo;
for (int i = 0; i < 4; i++) for (int i = 0; i < nb_dim; i++)
{ {
APPLY_SPECIFIC(previous_input_shape)[i] = APPLY_SPECIFIC(previous_input_shape)[i] =
CudaNdarray_HOST_DIMS(input)[i]; CudaNdarray_HOST_DIMS(input)[i];
......
...@@ -764,13 +764,12 @@ def test_dnn_conv_grad(): ...@@ -764,13 +764,12 @@ def test_dnn_conv_grad():
utt.verify_grad(dconvw, [img_val, kern_val, out_val]) utt.verify_grad(dconvw, [img_val, kern_val, out_val])
def test_conv3d_valid(): def test_conv3d_fwd():
print dnn.version() if not cuda.dnn.dnn_available() and dnn.version()[0] >= 3000 :
if not cuda.dnn.dnn_available():
raise SkipTest('"3D conv not supported in cudnn v1') raise SkipTest('"3D conv not supported in cudnn v1')
def run_conv3d_valid(inputs_shape, filters_shape, def run_conv3d_fwd(inputs_shape, filters_shape,
subsample=(1, 1, 1)): subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
...@@ -791,31 +790,126 @@ def test_conv3d_valid(): ...@@ -791,31 +790,126 @@ def test_conv3d_valid():
res_ref = f_ref() res_ref = f_ref()
res = f() res = f()
print res_ref.shape, res.shape
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
run_conv3d_valid(inputs_shape=(128, 3, 5, 5, 5), run_conv3d_fwd(inputs_shape=(128, 3, 5, 5, 5),
filters_shape=(64, 3, 1, 2, 4)) filters_shape=(64, 3, 1, 2, 4))
run_conv3d_valid(inputs_shape=(16, 4, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 4, 20, 12, 15),
filters_shape=(10, 4, 6, 12, 4), filters_shape=(10, 4, 6, 12, 4),
subsample=(2, 2, 2)) subsample=(2, 2, 2))
run_conv3d_valid(inputs_shape=(16, 4, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 4, 20, 12, 15),
filters_shape=(10, 4, 6, 12, 4), filters_shape=(10, 4, 6, 12, 4),
subsample=(2, 2, 2)) subsample=(2, 2, 2))
run_conv3d_valid(inputs_shape=(16, 1, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 1, 20, 12, 15),
filters_shape=(10, 1, 6, 12, 4), filters_shape=(10, 1, 6, 12, 4),
subsample=(3, 3, 3)) subsample=(3, 3, 3))
run_conv3d_valid(inputs_shape=(16, 2, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 2, 20, 12, 15),
filters_shape=(10, 2, 6, 12, 4), filters_shape=(10, 2, 6, 12, 4),
subsample=(3, 3, 3)) subsample=(3, 3, 3))
run_conv3d_valid(inputs_shape=(16, 1, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 1, 20, 12, 15),
filters_shape=(10, 1, 6, 12, 4), filters_shape=(10, 1, 6, 12, 4),
subsample=(3, 2, 1)) subsample=(3, 2, 1))
run_conv3d_valid(inputs_shape=(16, 1, 20, 12, 15), run_conv3d_fwd(inputs_shape=(16, 1, 20, 12, 15),
filters_shape=(10, 1, 6, 12, 4), filters_shape=(10, 1, 6, 12, 4),
subsample=(1, 2, 3)) subsample=(1, 2, 3))
def test_conv3d_gradweight():
if not cuda.dnn.dnn_available() and dnn.version()[0] >= 3000 :
raise SkipTest('"3D conv not supported in cudnn v1')
def run_gradweight(inputs_shape, filters_shape, dCdH_shape,
subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
kern_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
dCdH = shared(dCdH_val)
kern = shared(kern_val)
filters_shape_s = (filters_shape[0], filters_shape[2],
filters_shape[3], filters_shape[4],
filters_shape[1])
conv = theano.tensor.nnet.convGrad3D(V=inputs.dimshuffle(0, 2, 3, 4, 1),
dCdH=dCdH.dimshuffle(0, 2, 3, 4, 1),
WShape=filters_shape_s,
d=subsample)
desc = dnn.GpuDnnConv3dDesc(border_mode='valid', subsample=subsample,
conv_mode='cross')(inputs.shape, kern.shape)
gradW = dnn.GpuDnnConv3dGradW()(inputs, dCdH, kern, desc)
f_ref = theano.function([], conv.dimshuffle(0, 4, 1, 2, 3))
f = theano.function([], gradW, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
run_gradweight(inputs_shape=(16, 1, 10, 12, 16),
filters_shape=(10, 1, 6, 12, 4),
dCdH_shape=(16, 10, 5, 1, 13),
subsample=(1, 1, 1))
run_gradweight(inputs_shape=(16, 1, 20, 10, 16),
filters_shape=(10, 1, 6, 4, 4),
dCdH_shape=(16, 10, 8, 4, 7),
subsample=(2, 2, 2))
run_gradweight(inputs_shape=(16, 1, 20, 10, 16),
filters_shape=(10, 1, 6, 3, 4),
dCdH_shape=(16, 10, 5, 3, 5),
subsample=(3, 3, 3))
run_gradweight(inputs_shape=(16, 1, 20, 12, 16),
filters_shape=(10, 1, 6, 12, 4),
dCdH_shape=(16, 10, 8, 1, 5),
subsample=(2, 1, 3))
def test_conv3d_gradinput():
if not cuda.dnn.dnn_available() and dnn.version()[0] >= 3000 :
raise SkipTest('"3D conv not supported in cudnn v1')
def run_gradinput(inputs_shape, filters_shape,
subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
bias = shared(numpy.zeros(filters_shape[1]).astype('float32'))
conv = theano.tensor.nnet.convTransp3D(W=filters.dimshuffle(0, 2, 3, 4, 1),
b=bias, d=subsample,
H=inputs.dimshuffle(0, 2, 3, 4, 1))
f_ref = theano.function([], conv.dimshuffle(0, 4, 1, 2, 3))
res_ref = f_ref()
bottom_shape = res_ref.shape
bottom_val = numpy.random.random(bottom_shape).astype('float32')
bottom = shared(bottom_val)
desc = dnn.GpuDnnConv3dDesc(border_mode='valid', subsample=subsample,
conv_mode='cross')(bottom.shape, filters.shape)
gradI = dnn.GpuDnnConv3dGradI()(filters, inputs, bottom, desc)
f = theano.function([], gradI, mode=mode_with_gpu)
res = f()
utt.assert_allclose(res_ref, res)
run_gradinput(inputs_shape=(16, 10, 15, 12, 12),
filters_shape=(10, 1, 6, 12, 4))
run_gradinput(inputs_shape=(16, 10, 15, 12, 12),
filters_shape=(10, 1, 6, 12, 4),
subsample=(2, 2, 2))
run_gradinput(inputs_shape=(16, 10, 15, 12, 12),
filters_shape=(10, 1, 6, 12, 4),
subsample=(3, 3, 3))
run_gradinput(inputs_shape=(16, 10, 15, 12, 12),
filters_shape=(10, 1, 6, 12, 4),
subsample=(3, 1, 2))
def test_version(): def test_version():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论