Rename theano.tensor.nnet.bn to theano.tensor.nnet.batchnorm

8a23fb1c · Brandon T. Willard · Thomas Wiecki · e0e5b3b8 · 8a23fb1c · 8a23fb1c
--- a/tests/gpuarray/test_dnn.py
+++ b/tests/gpuarray/test_dnn.py
@@ -48,7 +48,7 @@ from theano.tensor.nnet import (
    LogSoftmax,
    Softmax,
    SoftmaxGrad,
-    bn,
+    batchnorm,
    conv2d,
    softmax,
    softmax_op,
@@ -1869,7 +1869,7 @@ def test_dnn_batchnorm_train():
                x_invstd_abstract,
                out_running_mean_abstract,
                out_running_var_abstract,
-            ) = bn.batch_normalization_train(
+            ) = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
@@ -1966,9 +1966,9 @@ def test_dnn_batchnorm_train():
                    isinstance(
                        n.op,
                        (
-                            bn.AbstractBatchNormTrain,
-                            bn.AbstractBatchNormInference,
-                            bn.AbstractBatchNormTrainGrad,
+                            batchnorm.AbstractBatchNormTrain,
+                            batchnorm.AbstractBatchNormInference,
+                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f_abstract.maker.fgraph.toposort()
@@ -2044,9 +2044,11 @@ def test_dnn_batchnorm_train_without_running_averages():
    out_gpu, x_mean_gpu, x_invstd_gpu = dnn.dnn_batch_normalization_train(
        x, scale, bias, "per-activation"
    )
-    out_abstract, x_mean_abstract, x_invstd_abstract = bn.batch_normalization_train(
-        x, scale, bias, "per-activation"
-    )
+    (
+        out_abstract,
+        x_mean_abstract,
+        x_invstd_abstract,
+    ) = batchnorm.batch_normalization_train(x, scale, bias, "per-activation")
    # backward pass
    grads_gpu = theano.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
    grads_abstract = theano.grad(
@@ -2081,9 +2083,9 @@ def test_dnn_batchnorm_train_without_running_averages():
            isinstance(
                n.op,
                (
-                    bn.AbstractBatchNormTrain,
-                    bn.AbstractBatchNormInference,
-                    bn.AbstractBatchNormTrainGrad,
+                    batchnorm.AbstractBatchNormTrain,
+                    batchnorm.AbstractBatchNormInference,
+                    batchnorm.AbstractBatchNormTrainGrad,
                ),
            )
            for n in f_abstract.maker.fgraph.toposort()
@@ -2113,9 +2115,11 @@ def test_without_dnn_batchnorm_train_without_running_averages():
    param_shape = (1, 10, 30, 25)

    # forward pass
-    out_abstract, x_mean_abstract, x_invstd_abstract = bn.batch_normalization_train(
-        x, scale, bias, "per-activation"
-    )
+    (
+        out_abstract,
+        x_mean_abstract,
+        x_invstd_abstract,
+    ) = batchnorm.batch_normalization_train(x, scale, bias, "per-activation")
    # backward pass
    grads_abstract = theano.grad(
        None, wrt=[x, scale, bias], known_grads={out_abstract: dy}
@@ -2144,9 +2148,9 @@ def test_without_dnn_batchnorm_train_without_running_averages():
            isinstance(
                n.op,
                (
-                    bn.AbstractBatchNormTrain,
-                    bn.AbstractBatchNormInference,
-                    bn.AbstractBatchNormTrainGrad,
+                    batchnorm.AbstractBatchNormTrain,
+                    batchnorm.AbstractBatchNormInference,
+                    batchnorm.AbstractBatchNormTrainGrad,
                ),
            )
            for n in f_abstract.maker.fgraph.toposort()
@@ -2243,7 +2247,7 @@ def test_batchnorm_inference():
                x, scale, bias, mean, var, mode, eps
            )
            # forward pass, abstract interface
-            out_abstract = bn.batch_normalization_test(
+            out_abstract = batchnorm.batch_normalization_test(
                x, scale, bias, mean, var, mode, eps
            )
            # reference forward pass
@@ -2293,9 +2297,9 @@ def test_batchnorm_inference():
                    isinstance(
                        n.op,
                        (
-                            bn.AbstractBatchNormTrain,
-                            bn.AbstractBatchNormInference,
-                            bn.AbstractBatchNormTrainGrad,
+                            batchnorm.AbstractBatchNormTrain,
+                            batchnorm.AbstractBatchNormInference,
+                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f_abstract.maker.fgraph.toposort()
@@ -2389,10 +2393,12 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
        invalid_axes_lists = (tuple(range(1, ndim)),)
        for axes in valid_axes_lists + invalid_axes_lists:
            # forward pass, abstract interface
-            out_train, x_mean, x_invstd = bn.batch_normalization_train(
+            out_train, x_mean, x_invstd = batchnorm.batch_normalization_train(
                x, scale, bias, axes
            )
-            out_test = bn.batch_normalization_test(x, scale, bias, mean, var, axes)
+            out_test = batchnorm.batch_normalization_test(
+                x, scale, bias, mean, var, axes
+            )
            # backward pass
            dy = vartype("dy")
            grads_train = theano.grad(
@@ -2433,9 +2439,9 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
                        isinstance(
                            n.op,
                            (
-                                bn.AbstractBatchNormTrain,
-                                bn.AbstractBatchNormInference,
-                                bn.AbstractBatchNormTrainGrad,
+                                batchnorm.AbstractBatchNormTrain,
+                                batchnorm.AbstractBatchNormInference,
+                                batchnorm.AbstractBatchNormTrainGrad,
                            ),
                        )
                        for n in f.maker.fgraph.toposort()
@@ -2450,9 +2456,9 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
                            (
                                dnn.GpuDnnBatchNorm,
                                dnn.GpuDnnBatchNormGrad,
-                                bn.AbstractBatchNormTrain,
-                                bn.AbstractBatchNormInference,
-                                bn.AbstractBatchNormTrainGrad,
+                                batchnorm.AbstractBatchNormTrain,
+                                batchnorm.AbstractBatchNormInference,
+                                batchnorm.AbstractBatchNormTrainGrad,
                            ),
                        )
                        for n in f.maker.fgraph.toposort()

--- a/tests/tensor/nnet/test_bn.py
+++ b/tests/tensor/nnet/test_bn.py
@@ -8,7 +8,7 @@ import theano.tensor as tt
 from tests import unittest_tools as utt
 from theano.configdefaults import config
 from theano.tensor.math import sum as tt_sum
-from theano.tensor.nnet import bn
+from theano.tensor.nnet import batchnorm
 from theano.tensor.type import (
    TensorType,
    matrix,
@@ -51,7 +51,7 @@ def test_BNComposite():
        f_ref = theano.function([x, b, g, m, v], [bn_ref_op])
        res_ref = f_ref(X, G, B, M, V)
        for mode in ["low_mem", "high_mem"]:
-            bn_op = bn.batch_normalization(x, g, b, m, v, mode=mode)
+            bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
            f = theano.function([x, b, g, m, v], [bn_op])
            res = f(X, G, B, M, V)
            utt.assert_allclose(res_ref, res)
@@ -79,13 +79,15 @@ def test_batch_normalization():
    f_ref = theano.function([x, g, b, m, v], [bn_ref_op])
    res_ref = f_ref(X, G, B, M, V)
    for mode in ["low_mem", "high_mem"]:
-        bn_op = bn.batch_normalization(x, g, b, m, v, mode=mode)
+        bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
        f = theano.function([x, g, b, m, v], [bn_op])
        res = f(X, G, B, M, V)
        utt.assert_allclose(res_ref, res)

        def bn_f(inputs, gamma, beta, mean, std):
-            return bn.batch_normalization(inputs, gamma, beta, mean, std, mode=mode)
+            return batchnorm.batch_normalization(
+                inputs, gamma, beta, mean, std, mode=mode
+            )

        utt.verify_grad(bn_f, [X, G, B, M, V])

@@ -95,7 +97,7 @@ def test_batch_normalization():
    f_ref = theano.function([x, b, g], [bn_ref_op])
    res_ref = f_ref(X, G, B)
    for mode in ["low_mem", "high_mem"]:
-        bn_op = bn.batch_normalization(
+        bn_op = batchnorm.batch_normalization(
            x,
            g,
            b,
@@ -108,7 +110,9 @@ def test_batch_normalization():
        utt.assert_allclose(res_ref, res)

        def bn_f(inputs, gamma, beta, mean, std):
-            return bn.batch_normalization(inputs, gamma, beta, mean, std, mode=mode)
+            return batchnorm.batch_normalization(
+                inputs, gamma, beta, mean, std, mode=mode
+            )

        utt.verify_grad(
            bn_f, [X, G, B, X.mean(axis=0)[np.newaxis], X.std(axis=0)[np.newaxis]]
@@ -144,7 +148,7 @@ def test_bn_feature_maps():
    res_ref = f_ref(X, G, B, M, V)

    for mode in ["low_mem", "high_mem"]:
-        bn_op = bn.batch_normalization(
+        bn_op = batchnorm.batch_normalization(
            x,
            g.dimshuffle("x", 0, "x", "x"),
            b.dimshuffle("x", 0, "x", "x"),
@@ -157,7 +161,7 @@ def test_bn_feature_maps():
        utt.assert_allclose(res_ref, res)

        def conv_bn(inputs, gamma, beta, mean, std):
-            return bn.batch_normalization(
+            return batchnorm.batch_normalization(
                inputs,
                gamma.dimshuffle("x", 0, "x", "x"),
                beta.dimshuffle("x", 0, "x", "x"),
@@ -196,7 +200,7 @@ def test_batch_normalization_train():
                x_invstd,
                out_running_mean,
                out_running_var,
-            ) = bn.batch_normalization_train(
+            ) = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
@@ -300,9 +304,9 @@ def test_batch_normalization_train():
                    isinstance(
                        n.op,
                        (
-                            bn.AbstractBatchNormTrain,
-                            bn.AbstractBatchNormInference,
-                            bn.AbstractBatchNormTrainGrad,
+                            batchnorm.AbstractBatchNormTrain,
+                            batchnorm.AbstractBatchNormInference,
+                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f.maker.fgraph.toposort()
@@ -378,19 +382,19 @@ def test_batch_normalization_train_grad_grad():
                continue

            def bn_grad_wrt_inputs_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = bn.AbstractBatchNormTrainGrad(axes)(
+                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
                    x, dy, scale, x_mean, x_invstd
                )
                return g_inputs

            def bn_grad_wrt_scale_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = bn.AbstractBatchNormTrainGrad(axes)(
+                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
                    x, dy, scale, x_mean, x_invstd
                )
                return g_scale

            def bn_grad_wrt_bias_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = bn.AbstractBatchNormTrainGrad(axes)(
+                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
                    x, dy, scale, x_mean, x_invstd
                )
                return g_bias
@@ -438,7 +442,7 @@ def test_batch_normalization_train_without_running_averages():
    param_shape = (1, 10, 30, 25)

    # forward pass
-    out, x_mean, x_invstd = bn.batch_normalization_train(
+    out, x_mean, x_invstd = batchnorm.batch_normalization_train(
        x, scale, bias, "per-activation"
    )
    # backward pass
@@ -451,9 +455,9 @@ def test_batch_normalization_train_without_running_averages():
            isinstance(
                n.op,
                (
-                    bn.AbstractBatchNormTrain,
-                    bn.AbstractBatchNormInference,
-                    bn.AbstractBatchNormTrainGrad,
+                    batchnorm.AbstractBatchNormTrain,
+                    batchnorm.AbstractBatchNormInference,
+                    batchnorm.AbstractBatchNormTrainGrad,
                ),
            )
            for n in f.maker.fgraph.toposort()
@@ -508,7 +512,7 @@ def test_batch_normalization_train_broadcast():
            running_var_bc = running_var.dimshuffle(params_dimshuffle)

            # batch_normalization_train with original, non-broadcasted variables
-            train_non_bc = bn.batch_normalization_train(
+            train_non_bc = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
@@ -519,7 +523,7 @@ def test_batch_normalization_train_broadcast():
                running_var,
            )
            # batch_normalization_train with broadcasted variables
-            train_bc = bn.batch_normalization_train(
+            train_bc = batchnorm.batch_normalization_train(
                x,
                scale_bc,
                bias_bc,
@@ -534,11 +538,11 @@ def test_batch_normalization_train_broadcast():
            )

            # batch_normalization_test with original, non-broadcasted variables
-            test_non_bc = bn.batch_normalization_test(
+            test_non_bc = batchnorm.batch_normalization_test(
                x, scale, bias, running_mean, running_var, axes, eps
            )
            # batch_normalization_test with broadcasted variables
-            test_bc = bn.batch_normalization_test(
+            test_bc = batchnorm.batch_normalization_test(
                x, scale_bc, bias_bc, running_mean_bc, running_var_bc, axes, eps
            )

@@ -588,7 +592,9 @@ def test_batch_normalization_test():
                continue

            # forward pass
-            out = bn.batch_normalization_test(x, scale, bias, mean, var, axes, eps)
+            out = batchnorm.batch_normalization_test(
+                x, scale, bias, mean, var, axes, eps
+            )
            # reference forward pass
            if axes == "per-activation":
                axes2 = (0,)
@@ -619,9 +625,9 @@ def test_batch_normalization_test():
                    isinstance(
                        n.op,
                        (
-                            bn.AbstractBatchNormTrain,
-                            bn.AbstractBatchNormInference,
-                            bn.AbstractBatchNormTrainGrad,
+                            batchnorm.AbstractBatchNormTrain,
+                            batchnorm.AbstractBatchNormInference,
+                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f.maker.fgraph.toposort()
@@ -660,10 +666,10 @@ def test_batch_normalization_broadcastable():
    )

    # forward pass
-    out_train, x_mean, x_invstd = bn.batch_normalization_train(
+    out_train, x_mean, x_invstd = batchnorm.batch_normalization_train(
        x, scale, bias, "spatial"
    )
-    out_test = bn.batch_normalization_test(x, scale, bias, mean, var, "spatial")
+    out_test = batchnorm.batch_normalization_test(x, scale, bias, mean, var, "spatial")
    # backward pass
    grads_train = tt.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
    grads_test = tt.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
@@ -677,9 +683,9 @@ def test_batch_normalization_broadcastable():
            isinstance(
                n.op,
                (
-                    bn.AbstractBatchNormTrain,
-                    bn.AbstractBatchNormInference,
-                    bn.AbstractBatchNormTrainGrad,
+                    batchnorm.AbstractBatchNormTrain,
+                    batchnorm.AbstractBatchNormInference,
+                    batchnorm.AbstractBatchNormTrainGrad,
                ),
            )
            for n in f.maker.fgraph.toposort()

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -179,7 +179,7 @@ from theano.tensor.basic import (
    Tri,
 )
 from theano.tensor.math import MaxAndArgmax
-from theano.tensor.nnet import bn, conv3d2d
+from theano.tensor.nnet import batchnorm, conv3d2d
 from theano.tensor.nnet.abstract_conv import (
    AbstractConv2d,
    AbstractConv2d_gradInputs,
@@ -3122,9 +3122,9 @@ register_opt("fast_compile")(abstract_batch_norm_groupopt)
 register_opt("fast_compile", name="abstract_batch_norm_db")(abstract_batch_norm_db)
 register_opt2(
    [
-        bn.AbstractBatchNormTrain,
-        bn.AbstractBatchNormTrainGrad,
-        bn.AbstractBatchNormInference,
+        batchnorm.AbstractBatchNormTrain,
+        batchnorm.AbstractBatchNormTrainGrad,
+        batchnorm.AbstractBatchNormInference,
    ],
    "fast_compile",
    name="abstract_batch_norm_db2",
@@ -3132,19 +3132,19 @@ register_opt2(

 for op, fct, cpu in [
    (
-        bn.AbstractBatchNormTrain,
+        batchnorm.AbstractBatchNormTrain,
        local_abstract_batch_norm_train_cudnn,
-        bn.local_abstract_batch_norm_train,
+        batchnorm.local_abstract_batch_norm_train,
    ),
    (
-        bn.AbstractBatchNormTrainGrad,
+        batchnorm.AbstractBatchNormTrainGrad,
        local_abstract_batch_norm_train_grad_cudnn,
-        bn.local_abstract_batch_norm_train_grad,
+        batchnorm.local_abstract_batch_norm_train_grad,
    ),
    (
-        bn.AbstractBatchNormInference,
+        batchnorm.AbstractBatchNormInference,
        local_abstract_batch_norm_inference_cudnn,
-        bn.local_abstract_batch_norm_inference,
+        batchnorm.local_abstract_batch_norm_inference,
    ),
 ]:
    lifter = op_lifter([op])(fct)

--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -40,7 +40,7 @@ from theano.tensor.nnet.basic import (
    softmax_with_bias,
    softsign,
 )
-from theano.tensor.nnet.bn import batch_normalization
+from theano.tensor.nnet.batchnorm import batch_normalization
 from theano.tensor.nnet.sigm import (
    hard_sigmoid,
    scalar_sigmoid,

--- a/theano/tensor/nnet/bn.py
+++ b/theano/tensor/nnet/bn.py