提交 585ccdc7 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add beta as an input to the convolution ops.

上级 7c436d0f
...@@ -411,7 +411,7 @@ class GpuDnnConv(DnnBase, COp): ...@@ -411,7 +411,7 @@ class GpuDnnConv(DnnBase, COp):
alg_def = ('CONV_ALGO', alg) alg_def = ('CONV_ALGO', alg)
return [alg_def] + inpl_def return [alg_def] + inpl_def
def make_node(self, img, kern, output, desc, alpha=None): def make_node(self, img, kern, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -427,12 +427,13 @@ class GpuDnnConv(DnnBase, COp): ...@@ -427,12 +427,13 @@ class GpuDnnConv(DnnBase, COp):
raise TypeError('desc must be cudnnConvolutionDescriptor_t') raise TypeError('desc must be cudnnConvolutionDescriptor_t')
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta')
return Apply(self, [img, kern, output, desc, alpha], return Apply(self, [img, kern, output, desc, alpha, beta],
[output.type()]) [output.type()])
def grad(self, inp, grads): def grad(self, inp, grads):
img, kerns, output, desc, alpha = inp img, kerns, output, desc, alpha, beta = inp
top, = grads top, = grads
top = gpu_contiguous(top) top = gpu_contiguous(top)
...@@ -440,12 +441,14 @@ class GpuDnnConv(DnnBase, COp): ...@@ -440,12 +441,14 @@ class GpuDnnConv(DnnBase, COp):
d_img = GpuDnnConvGradI()(kerns, top, img.zeros_like(), desc) d_img = GpuDnnConvGradI()(kerns, top, img.zeros_like(), desc)
d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc) d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta)
return [d_img, d_kerns, top * alpha, DisconnectedType()(), d_alpha] return [d_img * alpha, d_kerns * alpha, top * beta,
DisconnectedType()(), d_alpha, d_beta]
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1]] return [[1], [1], [1], [0], [1], [1]]
@staticmethod @staticmethod
def get_out_shape(ishape, kshape, border_mode, subsample): def get_out_shape(ishape, kshape, border_mode, subsample):
...@@ -507,7 +510,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -507,7 +510,7 @@ class GpuDnnConvGradW(DnnBase, COp):
self.inplace = False self.inplace = False
def grad(self, inp, grads): def grad(self, inp, grads):
img, top, output, desc, alpha = inp img, top, output, desc, alpha, beta = inp
kerns, = grads kerns, = grads
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
...@@ -515,12 +518,14 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -515,12 +518,14 @@ class GpuDnnConvGradW(DnnBase, COp):
d_img = GpuDnnConvGradI()(kerns, top, img.zeros_like(), desc) d_img = GpuDnnConvGradI()(kerns, top, img.zeros_like(), desc)
d_top = GpuDnnConv()(img, kerns, top.zeros_like(), desc) d_top = GpuDnnConv()(img, kerns, top.zeros_like(), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta)
return (d_img, d_top, kerns * alpha, DisconnectedType()(), d_alpha) return (d_img * alpha, d_top * alpha, kerns * beta,
DisconnectedType()(), d_alpha, d_beta)
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1]] return [[1], [1], [1], [0], [1], [1]]
def get_op_params(self): def get_op_params(self):
if self.inplace: if self.inplace:
...@@ -528,7 +533,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -528,7 +533,7 @@ class GpuDnnConvGradW(DnnBase, COp):
else: else:
return [] return []
def make_node(self, img, topgrad, output, desc, alpha=None): def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -544,8 +549,9 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -544,8 +549,9 @@ class GpuDnnConvGradW(DnnBase, COp):
raise TypeError('desc must be cudnnConvolutionDescriptor_t') raise TypeError('desc must be cudnnConvolutionDescriptor_t')
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta')
return Apply(self, [img, topgrad, output, desc, alpha], return Apply(self, [img, topgrad, output, desc, alpha, beta],
[output.type()]) [output.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -571,7 +577,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -571,7 +577,7 @@ class GpuDnnConvGradI(DnnBase, COp):
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
def grad(self, inp, grads): def grad(self, inp, grads):
kerns, top, output, desc, alpha = inp kerns, top, output, desc, alpha, beta = inp
img, = grads img, = grads
img = gpu_contiguous(img) img = gpu_contiguous(img)
...@@ -579,12 +585,14 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -579,12 +585,14 @@ class GpuDnnConvGradI(DnnBase, COp):
d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc) d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc)
d_top = GpuDnnConv()(img, kerns, top.zeros_like(), desc) d_top = GpuDnnConv()(img, kerns, top.zeros_like(), desc)
d_alpha = grad_not_implemented(self, 4, alpha) d_alpha = grad_not_implemented(self, 4, alpha)
d_beta = grad_not_implemented(self, 5, beta)
return (d_kerns, d_top, img * alpha, DisconnectedType()(), d_alpha) return (d_kerns * alpha, d_top * alpha, img * beta,
DisconnectedType()(), d_alpha, d_beta)
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1], [1], [1], [0], [1]] return [[1], [1], [1], [0], [1], [1]]
def get_op_params(self): def get_op_params(self):
if self.inplace: if self.inplace:
...@@ -592,7 +600,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -592,7 +600,7 @@ class GpuDnnConvGradI(DnnBase, COp):
else: else:
return [] return []
def make_node(self, kern, topgrad, output, desc, alpha=None): def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
topgrad = as_cuda_ndarray_variable(topgrad) topgrad = as_cuda_ndarray_variable(topgrad)
output = as_cuda_ndarray_variable(output) output = as_cuda_ndarray_variable(output)
...@@ -608,8 +616,9 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -608,8 +616,9 @@ class GpuDnnConvGradI(DnnBase, COp):
raise TypeError('desc must be cudnnConvolutionDescriptor_t') raise TypeError('desc must be cudnnConvolutionDescriptor_t')
alpha = ensure_float(alpha, _one, 'alpha') alpha = ensure_float(alpha, _one, 'alpha')
beta = ensure_float(beta, _zero, 'beta')
return Apply(self, [kern, topgrad, output, desc, alpha], return Apply(self, [kern, topgrad, output, desc, alpha, beta],
[output.type()]) [output.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
CudaNdarray *om, cudnnConvolutionDescriptor_t desc, CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **output) { float alpha, float beta, CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
...@@ -18,7 +18,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -18,7 +18,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
#else #else
if (CudaNdarray_prep_output(output, 4, CudaNdarray_HOST_DIMS(om)) != 0) if (CudaNdarray_prep_output(output, 4, CudaNdarray_HOST_DIMS(om)) != 0)
return 1; return 1;
if (CudaNdarray_CopyFromCudaNdarray(*output, om)) if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*output, om))
return 1; return 1;
#endif #endif
...@@ -47,8 +47,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -47,8 +47,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (workspace == NULL && worksize != 0) if (workspace == NULL && worksize != 0)
return 1; return 1;
const float beta = 1;
err = cudnnConvolutionForward( err = cudnnConvolutionForward(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
CudaNdarray *im, cudnnConvolutionDescriptor_t desc, CudaNdarray *im, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **input) { float alpha, float beta, CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
...@@ -18,15 +18,13 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -18,15 +18,13 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
#else #else
if (CudaNdarray_prep_output(input, 4, CudaNdarray_HOST_DIMS(im)) != 0) if (CudaNdarray_prep_output(input, 4, CudaNdarray_HOST_DIMS(im)) != 0)
return 1; return 1;
if (CudaNdarray_CopyFromCudaNdarray(*input, im)) if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*input, im))
return 1; return 1;
#endif #endif
if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
const float beta = 1;
err = cudnnConvolutionBackwardData( err = cudnnConvolutionBackwardData(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
int int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
CudaNdarray *km, cudnnConvolutionDescriptor_t desc, CudaNdarray *km, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **kerns) { float alpha, float beta, CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
...@@ -18,15 +18,13 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -18,15 +18,13 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
#else #else
if (CudaNdarray_prep_output(kerns, 4, CudaNdarray_HOST_DIMS(km)) != 0) if (CudaNdarray_prep_output(kerns, 4, CudaNdarray_HOST_DIMS(km)) != 0)
return 1; return 1;
if (CudaNdarray_CopyFromCudaNdarray(*kerns, km)) if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*kerns, km))
return 1; return 1;
#endif #endif
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
const float beta = 1;
err = cudnnConvolutionBackwardFilter( err = cudnnConvolutionBackwardFilter(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论