提交 fd0e3b65 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Inplace running mean and variance on gpuarray.

上级 186056b8
......@@ -41,7 +41,7 @@ from .elemwise import GpuElemwise
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, pool_db, pool_db2,
op_lifter, register_opt2)
op_lifter, register_opt2, register_inplace)
from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
......@@ -1666,20 +1666,32 @@ class GpuDnnBatchNorm(DnnBase):
both be None.
"""
__props__ = ('mode', 'running_averages')
__props__ = ('mode', 'running_averages', 'inplace_running_mean', 'inplace_running_var')
def __init__(self, mode='per-activation', running_averages=False):
def __init__(self, mode='per-activation', running_averages=False,
inplace_running_mean=False, inplace_running_var=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'],
'dnn_batchnorm_op')
assert (mode in ('per-activation', 'spatial'))
self.mode = mode
self.running_averages = running_averages
self.inplace_running_mean = inplace_running_mean
self.inplace_running_var = inplace_running_var
self.destroy_map = {}
if self.running_averages and self.inplace_running_mean:
self.destroy_map[3] = [5]
if self.running_averages and self.inplace_running_var:
self.destroy_map[4] = [6]
def get_op_params(self):
params = []
if self.running_averages:
params.append(('RUNNING_AVERAGES', '1'))
if self.inplace_running_mean:
params.append(('INPLACE_RUNNING_MEAN', '1'))
if self.inplace_running_var:
params.append(('INPLACE_RUNNING_VAR', '1'))
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION")))
......@@ -3115,6 +3127,26 @@ def local_abstract_batch_norm_train_cudnn(node):
return results
@register_inplace()
@local_optimizer([GpuDnnBatchNorm], inplace=True)
def local_batch_norm_inplace_running_mean(node):
if isinstance(node.op, GpuDnnBatchNorm) and node.op.running_averages and not node.op.inplace_running_mean:
return GpuDnnBatchNorm(mode=node.op.mode,
running_averages=node.op.running_averages,
inplace_running_mean=True,
inplace_running_var=node.op.inplace_running_var)(*node.inputs)
@register_inplace()
@local_optimizer([GpuDnnBatchNorm], inplace=True)
def local_batch_norm_inplace_running_var(node):
if isinstance(node.op, GpuDnnBatchNorm) and node.op.running_averages and not node.op.inplace_running_var:
return GpuDnnBatchNorm(mode=node.op.mode,
running_averages=node.op.running_averages,
inplace_running_mean=node.op.inplace_running_mean,
inplace_running_var=True)(*node.inputs)
@local_optimizer([bn.AbstractBatchNormTrainGrad])
def local_abstract_batch_norm_train_grad_cudnn(node):
if not isinstance(node.op, bn.AbstractBatchNormTrainGrad):
......
......@@ -36,16 +36,28 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
return 1;
#ifdef RUNNING_AVERAGES
#ifdef INPLACE_RUNNING_MEAN
Py_XDECREF(out_running_mean);
PyGpuArrayObject *running_mean = in_running_mean;
Py_INCREF(running_mean);
#else
PyGpuArrayObject *running_mean = *out_running_mean;
PyGpuArrayObject *running_var = *out_running_var;
running_mean = theano_try_copy(running_mean, in_running_mean);
if (running_mean == NULL) {
return 1;
}
#endif
#ifdef INPLACE_RUNNING_VAR
Py_XDECREF(out_running_var);
PyGpuArrayObject *running_var = in_running_var;
Py_INCREF(running_var);
#else
PyGpuArrayObject *running_var = *out_running_var;
running_var = theano_try_copy(running_var, in_running_var);
if (running_var == NULL) {
return 1;
}
#endif
#endif
{
......
from __future__ import absolute_import, print_function, division
import logging
from collections import OrderedDict
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
......@@ -1531,6 +1532,51 @@ def test_dnn_batchnorm_train_without_running_averages():
f_abstract(X, Scale, Bias, Dy)
def test_dnn_batchnorm_train_inplace():
# test inplace_running_mean and inplace_running_var
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng()
x, scale, bias = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias')
data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25)
running_mean = gpuarray_shared_constructor(
numpy.random.randn(*param_shape).astype(theano.config.floatX),
broadcastable=(True, False, False, False))
running_var = gpuarray_shared_constructor(
numpy.random.randn(*param_shape).astype(theano.config.floatX),
broadcastable=(True, False, False, False))
# forward pass
out, x_mean, x_invstd, new_running_mean, new_running_var = \
dnn.dnn_batch_normalization_train(x, scale, bias, 'per-activation',
epsilon=5e-3, running_average_factor=0.3,
running_mean=running_mean, running_var=running_var)
# update running averages
updates = OrderedDict()
updates[running_mean] = new_running_mean
updates[running_var] = new_running_var
# compile
f = theano.function([x, scale, bias],
[out, x_mean, x_invstd],
updates=updates,
mode=mode_with_gpu)
# check for the inplace settings
nodes = [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, dnn.GpuDnnBatchNorm)]
assert len(nodes) == 1
assert nodes[0].op.inplace_running_mean
assert nodes[0].op.inplace_running_var
# run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
f(X, Scale, Bias)
def test_batchnorm_inference():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论