提交 fe3871ff authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add the interface functions.

上级 42869d83
...@@ -1604,6 +1604,149 @@ class GpuDnnBatchNormGrad(DnnBase): ...@@ -1604,6 +1604,149 @@ class GpuDnnBatchNormGrad(DnnBase):
return [shape[0], shape[2], shape[2]] return [shape[0], shape[2], shape[2]]
def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
epsilon=1e-4):
"""
Performs batch normalization of the given inputs, using the mean and
variance of the inputs.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation or share normalization factors
across spatial dimensions (i.e., all dimensions past the second).
gamma : tensor
Learnable scale factors. Must match the dimensionality of `inputs`,
but have sizes of `1` for all axes normalized over (i.e., in the first
dimension for ``mode='per-activation'`, and additionally in all
dimensions past the second for ``mode='spatial'``).
beta : tensor
Learnable biases. Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
Returns
-------
out : tensor
Batch-normalized inputs.
mean : tensor
Means of `inputs` across the normalization axes.
stdinv : tensor
Inverse standard deviations of `inputs` across the normalization axes.
Notes
-----
Requires cuDNN 5 and Theano 0.9dev2 or more recent.
For 4d tensors, returned values are equivalent to:
.. code-block:: python
axes = 0 if mode == 'per-activation' else (0, 2, 3)
mean = inputs.mean(axes, keepdims=True)
stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
out = (inputs - mean) * gamma * stdinv + beta
"""
ndim = inputs.ndim
if ndim > 4:
raise ValueError("dnn_batch_normalization_train currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(gamma.ndim, beta.ndim, ndim))
if epsilon < 1e-5:
raise ValueError("epsilon must be at least 1e-5, got %f" % epsilon)
if ndim < 4:
inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim)
batchnorm_op = GpuDnnBatchNorm(mode=mode, epsilon=epsilon)
result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta)))
if ndim < 4:
result = tuple(theano.tensor.flatten(r, ndim) for r in result)
return result
def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
mode='per-activation', epsilon=1e-4):
"""
Performs batch normalization of the given inputs, using the given mean and
variance.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation or share normalization factors
across spatial dimensions (i.e., all dimensions past the second).
gamma : tensor
Scale factors. Must match the dimensionality of `inputs`, but have
sizes of `1` for all axes normalized over (i.e., in the first dimension
for ``mode='per-activation'`, and additionally in all dimensions past
the second for ``mode='spatial'``).
beta : tensor
Biases. Must match the tensor layout of `gamma`.
mean : tensor
Means. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
var : tensor
Variances. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
Returns
-------
out : tensor
Batch-normalized inputs.
Notes
-----
Requires cuDNN 5 and Theano 0.9dev2 or more recent.
For 4d tensors, the returned value is equivalent to:
.. code-block:: python
axes = (0,) if mode == 'per-activation' else (0, 2, 3)
gamma, beta, mean, var = (T.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
"""
ndim = inputs.ndim
if ndim > 4:
raise ValueError("dnn_batch_normalization_test currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(gamma.ndim, beta.ndim, ndim))
if mean.ndim != ndim or var.ndim != ndim:
raise ValueError("mean and var must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(mean.ndim, var.ndim, ndim))
if epsilon < 1e-5:
raise ValueError("epsilon must be at least 1e-5, got %f" % epsilon)
if ndim < 4:
inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim)
mean = theano.tensor.shape_padright(mean, 4 - ndim)
var = theano.tensor.shape_padright(var, 4 - ndim)
batchnorm_op = GpuDnnBatchNormInference(mode=mode, epsilon=epsilon)
result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta), gpu_contiguous(mean),
gpu_contiguous(var))
if ndim < 4:
result = theano.tensor.flatten(result, ndim)
return result
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights, @register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn') AbstractConv2d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论