提交 486b760d authored 作者: --global's avatar --global

Remove nb_dim param from dnn convolutions

上级 b9e29760
...@@ -534,7 +534,7 @@ class GpuDnnConv(DnnBase, COp): ...@@ -534,7 +534,7 @@ class GpuDnnConv(DnnBase, COp):
""" """
__props__ = ('workmem', 'inplace') __props__ = ('workmem', 'inplace')
__input_name__ = ('image', 'kernel', 'output', __input_name__ = ('image', 'kernel', 'output',
'descriptor', 'alpha', 'beta', 'nb_dim') 'descriptor', 'alpha', 'beta')
def __init__(self, workmem=None, inplace=False): def __init__(self, workmem=None, inplace=False):
""" """
...@@ -608,7 +608,7 @@ class GpuDnnConv(DnnBase, COp): ...@@ -608,7 +608,7 @@ class GpuDnnConv(DnnBase, COp):
return [alg_def, alg_choose_def, alg_choose_time_def] + inpl_def return [alg_def, alg_choose_def, alg_choose_time_def] + inpl_def
def make_node(self, img, kern, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, img, kern, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -625,13 +625,12 @@ class GpuDnnConv(DnnBase, COp): ...@@ -625,13 +625,12 @@ class GpuDnnConv(DnnBase, COp):
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifour, 'nb_dim')
return Apply(self, [img, kern, output, desc, alpha, beta, nb_dim], return Apply(self, [img, kern, output, desc, alpha, beta],
[output.type()]) [output.type()])
def grad(self, inp, grads): def grad(self, inp, grads):
img, kerns, output, desc, alpha, beta, nb_dim = inp img, kerns, output, desc, alpha, beta = inp
top, = grads top, = grads
top = gpu_contiguous(top) top = gpu_contiguous(top)
...@@ -640,14 +639,13 @@ class GpuDnnConv(DnnBase, COp): ...@@ -640,14 +639,13 @@ class GpuDnnConv(DnnBase, COp):
d_kerns = GpuDnnConvGradW()(img, top, gpu_alloc_empty(*kerns.shape), desc) d_kerns = GpuDnnConvGradW()(img, top, gpu_alloc_empty(*kerns.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return [d_img * alpha, d_kerns * alpha, top * beta, return [d_img * alpha, d_kerns * alpha, top * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim] DisconnectedType()(), d_alpha, d_beta]
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1], [1], [1]] return [[1], [1], [1], [0], [1], [1]]
@staticmethod @staticmethod
def get_out_shape(ishape, kshape, border_mode, subsample): def get_out_shape(ishape, kshape, border_mode, subsample):
...@@ -695,7 +693,7 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -695,7 +693,7 @@ class GpuDnnConv3d(GpuDnnConv):
""" """
__props__ = ('workmem', 'inplace') __props__ = ('workmem', 'inplace')
__input_name__ = ('image', 'kernel', 'output', __input_name__ = ('image', 'kernel', 'output',
'descriptor', 'alpha', 'beta', 'nb_dim') 'descriptor', 'alpha', 'beta')
def __init__(self, workmem=None, inplace=False): def __init__(self, workmem=None, inplace=False):
""" """
...@@ -705,7 +703,7 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -705,7 +703,7 @@ class GpuDnnConv3d(GpuDnnConv):
super(GpuDnnConv3d, self).__init__(workmem='guess', inplace=inplace) super(GpuDnnConv3d, self).__init__(workmem='guess', inplace=inplace)
assert self.workmem in ['none', 'time','guess'] assert self.workmem in ['none', 'time','guess']
def make_node(self, img, kern, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, img, kern, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
...@@ -721,13 +719,12 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -721,13 +719,12 @@ class GpuDnnConv3d(GpuDnnConv):
raise TypeError('desc must be cudnnConvolutionDescriptor_t') raise TypeError('desc must be cudnnConvolutionDescriptor_t')
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifive, 'nb_dim')
return Apply(self, [img, kern, output, desc, alpha, beta, nb_dim], return Apply(self, [img, kern, output, desc, alpha, beta],
[output.type()]) [output.type()])
def grad(self, inp, grads): def grad(self, inp, grads):
img, kerns, output, desc, alpha, beta, nb_dim = inp img, kerns, output, desc, alpha, beta = inp
top, = grads top, = grads
top = gpu_contiguous(top) top = gpu_contiguous(top)
...@@ -736,10 +733,9 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -736,10 +733,9 @@ class GpuDnnConv3d(GpuDnnConv):
d_kerns = GpuDnnConvGrad3dW()(img, top, gpu_alloc_empty(*kerns.shape), desc) d_kerns = GpuDnnConvGrad3dW()(img, top, gpu_alloc_empty(*kerns.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return [d_img * alpha, d_kerns * alpha, top * beta, return [d_img * alpha, d_kerns * alpha, top * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim] DisconnectedType()(), d_alpha, d_beta]
@staticmethod @staticmethod
def get_out_shape(ishape, kshape, border_mode, subsample): def get_out_shape(ishape, kshape, border_mode, subsample):
...@@ -788,7 +784,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -788,7 +784,7 @@ class GpuDnnConvGradW(DnnBase, COp):
""" """
__props__ = ('workmem', 'inplace',) __props__ = ('workmem', 'inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta', 'nb_dim') __input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"], COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"],
...@@ -809,7 +805,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -809,7 +805,7 @@ class GpuDnnConvGradW(DnnBase, COp):
self.inplace = False self.inplace = False
def grad(self, inp, grads): def grad(self, inp, grads):
img, top, output, desc, alpha, beta, nb_dim = inp img, top, output, desc, alpha, beta = inp
kerns, = grads kerns, = grads
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
...@@ -818,14 +814,13 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -818,14 +814,13 @@ class GpuDnnConvGradW(DnnBase, COp):
d_top = GpuDnnConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_top = GpuDnnConv()(img, kerns, gpu_alloc_empty(*top.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return (d_img * alpha, d_top * alpha, kerns * beta, return (d_img * alpha, d_top * alpha, kerns * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim) DisconnectedType()(), d_alpha, d_beta)
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1], [1], [1]] return [[1], [1], [1], [0], [1], [1]]
def get_op_params(self): def get_op_params(self):
if self.inplace: if self.inplace:
...@@ -854,7 +849,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -854,7 +849,7 @@ class GpuDnnConvGradW(DnnBase, COp):
return inplace_def + [alg_def, alg_choose_def] return inplace_def + [alg_def, alg_choose_def]
def make_node(self, img, topgrad, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -871,10 +866,8 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -871,10 +866,8 @@ class GpuDnnConvGradW(DnnBase, COp):
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifour, 'nb_dim')
return Apply(self, [img, topgrad, output, desc, alpha, beta],
return Apply(self, [img, topgrad, output, desc, alpha, beta, nb_dim],
[output.type()]) [output.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -890,14 +883,14 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW): ...@@ -890,14 +883,14 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
""" """
__props__ = ('workmem', 'inplace',) __props__ = ('workmem', 'inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta', 'nb_dim') __input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none') super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none')
assert self.workmem in ['none', 'time','guess'] assert self.workmem in ['none', 'time','guess']
def grad(self, inp, grads): def grad(self, inp, grads):
img, top, output, desc, alpha, beta, nb_dim = inp img, top, output, desc, alpha, beta = inp
kerns, = grads kerns, = grads
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
...@@ -906,12 +899,11 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW): ...@@ -906,12 +899,11 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
d_top = GpuDnnConv3d()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_top = GpuDnnConv3d()(img, kerns, gpu_alloc_empty(*top.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return (d_img * alpha, d_top * alpha, kerns * beta, return (d_img * alpha, d_top * alpha, kerns * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim) DisconnectedType()(), d_alpha, d_beta)
def make_node(self, img, topgrad, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -929,9 +921,8 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW): ...@@ -929,9 +921,8 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifive, 'nb_dim')
return Apply(self, [img, topgrad, output, desc, alpha, beta, nb_dim], return Apply(self, [img, topgrad, output, desc, alpha, beta],
[output.type()]) [output.type()])
...@@ -946,8 +937,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -946,8 +937,7 @@ class GpuDnnConvGradI(DnnBase, COp):
""" """
__props__ = ('workmem', 'inplace',) __props__ = ('workmem', 'inplace',)
__input_name__ = ('kernel', 'grad', 'output', __input_name__ = ('kernel', 'grad', 'output', 'descriptor', 'alpha', 'beta')
'descriptor', 'alpha', 'beta', 'nb_dim')
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"], COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"],
...@@ -966,7 +956,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -966,7 +956,7 @@ class GpuDnnConvGradI(DnnBase, COp):
self.workmem = 'none' self.workmem = 'none'
def grad(self, inp, grads): def grad(self, inp, grads):
kerns, top, output, desc, alpha, beta, nb_dim = inp kerns, top, output, desc, alpha, beta = inp
img, = grads img, = grads
img = gpu_contiguous(img) img = gpu_contiguous(img)
...@@ -975,14 +965,13 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -975,14 +965,13 @@ class GpuDnnConvGradI(DnnBase, COp):
d_top = GpuDnnConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_top = GpuDnnConv()(img, kerns, gpu_alloc_empty(*top.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return (d_kerns * alpha, d_top * alpha, img * beta, return (d_kerns * alpha, d_top * alpha, img * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim) DisconnectedType()(), d_alpha, d_beta)
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1], [1], [1]] return [[1], [1], [1], [0], [1], [1]]
def get_op_params(self): def get_op_params(self):
if self.inplace: if self.inplace:
...@@ -1011,7 +1000,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -1011,7 +1000,7 @@ class GpuDnnConvGradI(DnnBase, COp):
return inplace_def + [alg_def, alg_choose_def] return inplace_def + [alg_def, alg_choose_def]
def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -1028,9 +1017,8 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -1028,9 +1017,8 @@ class GpuDnnConvGradI(DnnBase, COp):
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifour, 'nb_dim')
return Apply(self, [kern, topgrad, output, desc, alpha, beta, nb_dim], return Apply(self, [kern, topgrad, output, desc, alpha, beta],
[output.type()]) [output.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -1048,8 +1036,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -1048,8 +1036,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
""" """
__props__ = ('inplace',) __props__ = ('inplace',)
__input_name__ = ('kernel', 'grad', 'output', __input_name__ = ('kernel', 'grad', 'output', 'descriptor', 'alpha', 'beta')
'descriptor', 'alpha', 'beta', 'nb_dim')
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
### deterministic (default value) is not yet supported for conv3d ### deterministic (default value) is not yet supported for conv3d
...@@ -1060,7 +1047,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -1060,7 +1047,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
def grad(self, inp, grads): def grad(self, inp, grads):
kerns, top, output, desc, alpha, beta, nb_dim = inp kerns, top, output, desc, alpha, beta = inp
img, = grads img, = grads
img = gpu_contiguous(img) img = gpu_contiguous(img)
...@@ -1069,12 +1056,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -1069,12 +1056,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
d_top = GpuDnnConv3d()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_top = GpuDnnConv3d()(img, kerns, gpu_alloc_empty(*top.shape), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta) d_beta = grad_not_implemented(self, 5, beta)
d_nb_dim = grad_not_implemented(self, 6, nb_dim)
return (d_kerns * alpha, d_top * alpha, img * beta, return (d_kerns * alpha, d_top * alpha, img * beta,
DisconnectedType()(), d_alpha, d_beta, d_nb_dim) DisconnectedType()(), d_alpha, d_beta)
def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -1091,9 +1077,8 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -1091,9 +1077,8 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta') beta = ensure_float(beta, _zero, 'beta')
nb_dim = ensure_int(nb_dim, _ifive, 'nb_dim')
return Apply(self, [kern, topgrad, output, desc, alpha, beta, nb_dim], return Apply(self, [kern, topgrad, output, desc, alpha, beta],
[output.type()]) [output.type()])
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
CudaNdarray *om, cudnnConvolutionDescriptor_t desc, CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, int nb_dim, CudaNdarray **output) { float alpha, float beta, CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) { if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
int nb_dim = CudaNdarray_NDIM(input);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*output); Py_XDECREF(*output);
*output = om; *output = om;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
CudaNdarray *im, cudnnConvolutionDescriptor_t desc, CudaNdarray *im, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, int nb_dim, CudaNdarray **input) { float alpha, float beta, CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) { if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*input); Py_XDECREF(*input);
*input = im; *input = im;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
CudaNdarray *km, cudnnConvolutionDescriptor_t desc, CudaNdarray *km, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, int nb_dim, CudaNdarray **kerns) { float alpha, float beta, CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) { if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) {
...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -17,6 +17,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*kerns); Py_XDECREF(*kerns);
*kerns = km; *kerns = km;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论