提交 8202b103 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Inplace batch_normalization_train output on gpuarray.

上级 fd0e3b65
...@@ -1666,19 +1666,24 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1666,19 +1666,24 @@ class GpuDnnBatchNorm(DnnBase):
both be None. both be None.
""" """
__props__ = ('mode', 'running_averages', 'inplace_running_mean', 'inplace_running_var') __props__ = ('mode', 'running_averages', 'inplace_running_mean',
'inplace_running_var', 'inplace_output')
def __init__(self, mode='per-activation', running_averages=False, def __init__(self, mode='per-activation', running_averages=False,
inplace_running_mean=False, inplace_running_var=False): inplace_running_mean=False, inplace_running_var=False,
inplace_output=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'], DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'],
'dnn_batchnorm_op') 'dnn_batchnorm_op')
assert (mode in ('per-activation', 'spatial')) assert (mode in ('per-activation', 'spatial'))
self.mode = mode self.mode = mode
self.running_averages = running_averages self.running_averages = running_averages
self.inplace_output = inplace_output
self.inplace_running_mean = inplace_running_mean self.inplace_running_mean = inplace_running_mean
self.inplace_running_var = inplace_running_var self.inplace_running_var = inplace_running_var
self.destroy_map = {} self.destroy_map = {}
if self.inplace_output:
self.destroy_map[0] = [0]
if self.running_averages and self.inplace_running_mean: if self.running_averages and self.inplace_running_mean:
self.destroy_map[3] = [5] self.destroy_map[3] = [5]
if self.running_averages and self.inplace_running_var: if self.running_averages and self.inplace_running_var:
...@@ -1686,6 +1691,8 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1686,6 +1691,8 @@ class GpuDnnBatchNorm(DnnBase):
def get_op_params(self): def get_op_params(self):
params = [] params = []
if self.inplace_output:
params.append(('INPLACE_OUTPUT', '1'))
if self.running_averages: if self.running_averages:
params.append(('RUNNING_AVERAGES', '1')) params.append(('RUNNING_AVERAGES', '1'))
if self.inplace_running_mean: if self.inplace_running_mean:
...@@ -3127,6 +3134,17 @@ def local_abstract_batch_norm_train_cudnn(node): ...@@ -3127,6 +3134,17 @@ def local_abstract_batch_norm_train_cudnn(node):
return results return results
@register_inplace()
@local_optimizer([GpuDnnBatchNorm], inplace=True)
def local_batch_norm_inplace_output(node):
if isinstance(node.op, GpuDnnBatchNorm) and not node.op.inplace_output:
return GpuDnnBatchNorm(mode=node.op.mode,
running_averages=node.op.running_averages,
inplace_running_mean=node.op.inplace_running_mean,
inplace_running_var=node.op.inplace_running_var,
inplace_output=True)(*node.inputs)
@register_inplace() @register_inplace()
@local_optimizer([GpuDnnBatchNorm], inplace=True) @local_optimizer([GpuDnnBatchNorm], inplace=True)
def local_batch_norm_inplace_running_mean(node): def local_batch_norm_inplace_running_mean(node):
...@@ -3134,7 +3152,8 @@ def local_batch_norm_inplace_running_mean(node): ...@@ -3134,7 +3152,8 @@ def local_batch_norm_inplace_running_mean(node):
return GpuDnnBatchNorm(mode=node.op.mode, return GpuDnnBatchNorm(mode=node.op.mode,
running_averages=node.op.running_averages, running_averages=node.op.running_averages,
inplace_running_mean=True, inplace_running_mean=True,
inplace_running_var=node.op.inplace_running_var)(*node.inputs) inplace_running_var=node.op.inplace_running_var,
inplace_output=node.op.inplace_output)(*node.inputs)
@register_inplace() @register_inplace()
...@@ -3144,7 +3163,8 @@ def local_batch_norm_inplace_running_var(node): ...@@ -3144,7 +3163,8 @@ def local_batch_norm_inplace_running_var(node):
return GpuDnnBatchNorm(mode=node.op.mode, return GpuDnnBatchNorm(mode=node.op.mode,
running_averages=node.op.running_averages, running_averages=node.op.running_averages,
inplace_running_mean=node.op.inplace_running_mean, inplace_running_mean=node.op.inplace_running_mean,
inplace_running_var=True)(*node.inputs) inplace_running_var=True,
inplace_output=node.op.inplace_output)(*node.inputs)
@local_optimizer([bn.AbstractBatchNormTrainGrad]) @local_optimizer([bn.AbstractBatchNormTrainGrad])
......
...@@ -25,8 +25,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -25,8 +25,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (epsilon < 1e-5) if (epsilon < 1e-5)
return 1; return 1;
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
#endif
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
......
...@@ -1570,6 +1570,7 @@ def test_dnn_batchnorm_train_inplace(): ...@@ -1570,6 +1570,7 @@ def test_dnn_batchnorm_train_inplace():
assert len(nodes) == 1 assert len(nodes) == 1
assert nodes[0].op.inplace_running_mean assert nodes[0].op.inplace_running_mean
assert nodes[0].op.inplace_running_var assert nodes[0].op.inplace_running_var
assert nodes[0].op.inplace_output
# run # run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论