提交 8b9f7336 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5190 from gvtulder/f-batchnorm-abstract

Abstract Ops for batch normalization
...@@ -10,6 +10,9 @@ ...@@ -10,6 +10,9 @@
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`. They must be added manually as they do not have the same user interface. .. autofunction:: theano.tensor.nnet.bn.batch_normalization_train
.. autofunction:: theano.tensor.nnet.bn.batch_normalization_test
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`.
.. autofunction:: theano.tensor.nnet.bn.batch_normalization .. autofunction:: theano.tensor.nnet.bn.batch_normalization
差异被折叠。
...@@ -2,8 +2,19 @@ ...@@ -2,8 +2,19 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, npy_float64 epsilon, PyGpuArrayObject *bias, npy_float64 epsilon,
PyGpuArrayObject **outp, PyGpuArrayObject **x_mean, npy_float64 running_average_factor,
PyGpuArrayObject **x_invstd, cudnnHandle_t _handle) { #ifdef RUNNING_AVERAGES
PyGpuArrayObject *in_running_mean,
PyGpuArrayObject *in_running_var,
#endif
PyGpuArrayObject **outp,
PyGpuArrayObject **x_mean,
PyGpuArrayObject **x_invstd,
#ifdef RUNNING_AVERAGES
PyGpuArrayObject **out_running_mean,
PyGpuArrayObject **out_running_var,
#endif
cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context; PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
...@@ -16,8 +27,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -16,8 +27,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
return 1; return 1;
} }
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
#endif
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
...@@ -26,6 +43,31 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -26,6 +43,31 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(*outp, bn_output) != 0) if (c_set_tensorNd(*outp, bn_output) != 0)
return 1; return 1;
#ifdef RUNNING_AVERAGES
#ifdef INPLACE_RUNNING_MEAN
Py_XDECREF(out_running_mean);
PyGpuArrayObject *running_mean = in_running_mean;
Py_INCREF(running_mean);
#else
PyGpuArrayObject *running_mean = *out_running_mean;
running_mean = theano_try_copy(running_mean, in_running_mean);
if (running_mean == NULL) {
return 1;
}
#endif
#ifdef INPLACE_RUNNING_VAR
Py_XDECREF(out_running_var);
PyGpuArrayObject *running_var = in_running_var;
Py_INCREF(running_var);
#else
PyGpuArrayObject *running_var = *out_running_var;
running_var = theano_try_copy(running_var, in_running_var);
if (running_var == NULL) {
return 1;
}
#endif
#endif
{ {
const float falpha = 1.; const float falpha = 1.;
const float fbeta = 0.; const float fbeta = 0.;
...@@ -52,9 +94,15 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -52,9 +94,15 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
bn_params, bn_params,
PyGpuArray_DEV_DATA(scale), PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(bias), PyGpuArray_DEV_DATA(bias),
#ifdef RUNNING_AVERAGES
running_average_factor,
PyGpuArray_DEV_DATA(running_mean),
PyGpuArray_DEV_DATA(running_var),
#else
0, 0,
NULL, // running mean, deliberately unused NULL, // running mean, deliberately unused
NULL, // running var, deliberately unused NULL, // running var, deliberately unused
#endif
epsilon, epsilon,
PyGpuArray_DEV_DATA(*x_mean), PyGpuArray_DEV_DATA(*x_mean),
PyGpuArray_DEV_DATA(*x_invstd) PyGpuArray_DEV_DATA(*x_invstd)
...@@ -64,6 +112,10 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -64,6 +112,10 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
cudnnGetErrorString(err)); cudnnGetErrorString(err));
return 1; return 1;
} }
#ifdef RUNNING_AVERAGES
*out_running_mean = running_mean;
*out_running_var = running_var;
#endif
} }
return 0; return 0;
} }
...@@ -16,8 +16,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -16,8 +16,14 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
return 1; return 1;
} }
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
#endif
if (c_set_tensorNd(*outp, bn_output) != 0) if (c_set_tensorNd(*outp, bn_output) != 0)
return 1; return 1;
......
...@@ -12,7 +12,7 @@ import warnings ...@@ -12,7 +12,7 @@ import warnings
import theano import theano
from theano.compat import get_unbound_function from theano.compat import get_unbound_function
from theano.compile import optdb from theano.compile import optdb
from theano.gof import EquilibriumDB, SequenceDB from theano.gof import EquilibriumDB, SequenceDB, TopoOptimizer
from theano.gof.cmodule import get_lib_extension from theano.gof.cmodule import get_lib_extension
from theano.gof.compilelock import get_lock, release_lock from theano.gof.compilelock import get_lock, release_lock
from theano import config from theano import config
...@@ -40,6 +40,17 @@ def register_opt(*tags, **kwargs): ...@@ -40,6 +40,17 @@ def register_opt(*tags, **kwargs):
return f return f
def register_inplace(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
optdb.register(
name, TopoOptimizer(
local_opt, failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace', 'gpu', *tags)
return local_opt
return f
_logger_name = 'theano.sandbox.cuda' _logger_name = 'theano.sandbox.cuda'
_logger = logging.getLogger(_logger_name) _logger = logging.getLogger(_logger_name)
......
差异被折叠。
...@@ -3050,3 +3050,28 @@ conv_groupopt.register('local_abstractconv3d_gradinputs_gemm', ...@@ -3050,3 +3050,28 @@ conv_groupopt.register('local_abstractconv3d_gradinputs_gemm',
local_abstractconv3d_gradinputs_gemm, 30, local_abstractconv3d_gradinputs_gemm, 30,
'conv_gemm', 'conv_gemm',
'gpu', 'fast_compile', 'fast_run') 'gpu', 'fast_compile', 'fast_run')
# Register cuDNN batch normalization implementation
abstract_batch_norm_groupopt = theano.gof.optdb.LocalGroupDB()
abstract_batch_norm_groupopt.__name__ = "gpu_batchnorm_opts"
register_opt('fast_compile')(abstract_batch_norm_groupopt)
# cuDNN optimizations are only registered if cuDNN is available.
# (we import these opts here instead of at the top of this file
# to avoid a circular dependency problem with dnn)
from .dnn import (local_abstract_batch_norm_train_cudnn,
local_abstract_batch_norm_train_grad_cudnn,
local_abstract_batch_norm_inference_cudnn) # noqa: 402
abstract_batch_norm_groupopt.register('local_abstract_batch_norm_train_dnn',
local_abstract_batch_norm_train_cudnn, 20,
'batchnorm_dnn',
'gpu', 'fast_compile', 'fast_run', 'cudnn')
abstract_batch_norm_groupopt.register('local_abstract_batch_norm_train_grad_dnn',
local_abstract_batch_norm_train_grad_cudnn, 20,
'batchnorm_dnn',
'gpu', 'fast_compile', 'fast_run', 'cudnn')
abstract_batch_norm_groupopt.register('local_abstract_batch_norm_inference_dnn',
local_abstract_batch_norm_inference_cudnn, 20,
'batchnorm_dnn',
'gpu', 'fast_compile', 'fast_run', 'cudnn')
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论