Replace theano.tensor alias T with tt in tests.tensor.nnet

e103e7fb · Brandon T. Willard · daacd069 · e103e7fb · e103e7fb · e103e7fb
--- a/tests/tensor/nnet/speed_test_conv.py
+++ b/tests/tensor/nnet/speed_test_conv.py
@@ -2,7 +2,7 @@ import time

 import numpy as N

-import theano.tensor as T
+import theano.tensor as tt

 from theano import function, Mode
 from theano.tensor.nnet.conv import ConvOp
@@ -39,7 +39,7 @@ def flip(kern, kshp):

 global_rng = N.random.RandomState(3423489)

-dmatrix4 = T.TensorType("float64", (False, False, False, False))
+dmatrix4 = tt.TensorType("float64", (False, False, False, False))


 def exec_multilayer_conv_nnet_old(
@@ -51,7 +51,7 @@ def exec_multilayer_conv_nnet_old(
    nkerns,
    unroll_batch=0,
    unroll_kern=0,
-    img=T.dmatrix(),
+    img=tt.dmatrix(),
    validate=True,
    conv_op_py=False,
    do_print=True,
@@ -64,7 +64,7 @@ def exec_multilayer_conv_nnet_old(
    # build actual input images
    imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])

-    a = T.dmatrix()
+    a = tt.dmatrix()
    kerns = [a for i in nkerns]
    inputs4 = dmatrix4()
    kerns4 = dmatrix4()
@@ -180,7 +180,7 @@ def exec_multilayer_conv_nnet(
    nkerns,
    unroll_batch=0,
    unroll_kern=0,
-    img=T.dmatrix(),
+    img=tt.dmatrix(),
    do_print=True,
    repeat=1,
    unroll_patch=False,
@@ -191,7 +191,7 @@ def exec_multilayer_conv_nnet(
    # build actual input images
    imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])

-    a = T.dmatrix()
+    a = tt.dmatrix()
    kerns = [a for i in nkerns]
    inputs4 = dmatrix4()
    kerns4 = dmatrix4()
@@ -277,7 +277,7 @@ def speed_multilayer_conv():
    ]  # (1,1)]#(2,2) bugged
    convmodes = ["valid", "full"]
    # do_convolve2 = False
-    a = T.dmatrix()
+    a = tt.dmatrix()
    kerns = [a for i in nkerns]

    assert len(kshps) == len(nkerns) == len(kerns)

--- a/tests/tensor/nnet/test_bn.py
+++ b/tests/tensor/nnet/test_bn.py
@@ -3,7 +3,7 @@ import pytest
 import numpy as np

 import theano
-import theano.tensor as T
+import theano.tensor as tt

 from collections import OrderedDict

@@ -29,11 +29,11 @@ def test_BNComposite():
        M = 1 + np.random.random([20]).astype("float32")
        V = 1 + np.random.random([20]).astype("float32")

-        x = theano.tensor.matrix("x")
-        b = theano.tensor.vector("b")
-        g = theano.tensor.vector("g")
-        m = theano.tensor.vector("m")
-        v = theano.tensor.vector("v")
+        x = tt.matrix("x")
+        b = tt.vector("b")
+        g = tt.vector("g")
+        m = tt.vector("m")
+        v = tt.vector("v")

        x.tag.test_value = np.random.rand(2, 2).astype(theano.config.floatX)
        b.tag.test_value = np.random.rand(2).astype(theano.config.floatX)
@@ -65,11 +65,11 @@ def test_batch_normalization():
    M = 1 + np.random.random([20]).astype("float32")
    V = 1 + np.random.random([20]).astype("float32")

-    x = theano.tensor.matrix("x")
-    b = theano.tensor.vector("b")
-    g = theano.tensor.vector("g")
-    m = theano.tensor.vector("m")
-    v = theano.tensor.vector("v")
+    x = tt.matrix("x")
+    b = tt.vector("b")
+    g = tt.vector("g")
+    m = tt.vector("m")
+    v = tt.vector("v")

    bn_ref_op = bn_ref(x, g, b, m, v)
    f_ref = theano.function([x, g, b, m, v], [bn_ref_op])
@@ -123,11 +123,11 @@ def test_bn_feature_maps():
    M = 1 + np.random.random([3]).astype("float32")
    V = 1 + np.random.random([3]).astype("float32")

-    x = theano.tensor.tensor4("x")
-    b = theano.tensor.vector("b")
-    g = theano.tensor.vector("g")
-    m = theano.tensor.vector("m")
-    v = theano.tensor.vector("v")
+    x = tt.tensor4("x")
+    b = tt.vector("b")
+    g = tt.vector("g")
+    m = tt.vector("m")
+    v = tt.vector("v")

    bn_ref_op = bn_ref(
        x,
@@ -170,7 +170,7 @@ def test_batch_normalization_train():
    utt.seed_rng()

    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (T.tensor5, T.tensor3, T.vector):
+        for vartype in (tt.tensor5, tt.tensor3, tt.vector):
            x, scale, bias, running_mean, running_var = (
                vartype(n)
                for n in ("x", "scale", "bias", "running_mean", "running_var")
@@ -211,11 +211,11 @@ def test_batch_normalization_train():
                axes2 = axes
            x_mean2 = x.mean(axis=axes2, keepdims=True)
            x_var2 = x.var(axis=axes2, keepdims=True)
-            x_invstd2 = T.inv(T.sqrt(x_var2 + eps))
-            scale2 = T.addbroadcast(scale, *axes2)
-            bias2 = T.addbroadcast(bias, *axes2)
+            x_invstd2 = tt.inv(tt.sqrt(x_var2 + eps))
+            scale2 = tt.addbroadcast(scale, *axes2)
+            bias2 = tt.addbroadcast(bias, *axes2)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
-            m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
+            m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
            out_running_mean2 = (
                running_mean * (1 - running_average_factor)
                + x_mean2 * running_average_factor
@@ -226,14 +226,14 @@ def test_batch_normalization_train():
            )
            # backward pass
            dy = vartype("dy")
-            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
+            grads = tt.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
-            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
+            grads2 = tt.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # second-order backward pass
            dx = vartype("dinputs")
            dscale = vartype("dscale")
            dbias = vartype("dbias")
-            grad_grads = T.grad(
+            grad_grads = tt.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict(
@@ -252,7 +252,7 @@ def test_batch_normalization_train():
                return_disconnected="zero",
            )
            # reference second-order backward pass
-            grad_grads2 = T.grad(
+            grad_grads2 = tt.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict(
@@ -354,7 +354,7 @@ def test_batch_normalization_train_grad_grad():
    utt.seed_rng()

    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
+        for vartype in (tt.tensor5, tt.tensor4, tt.tensor3, tt.matrix, tt.vector):
            # run these experiments with float64 for sufficient numerical stability
            x, dy, scale, x_mean, x_invstd = (
                vartype(n, dtype="float64")
@@ -425,10 +425,10 @@ def test_batch_normalization_train_without_running_averages():
    utt.seed_rng()

    x, scale, bias, dy = (
-        T.tensor4("x"),
-        T.tensor4("scale"),
-        T.tensor4("bias"),
-        T.tensor4("dy"),
+        tt.tensor4("x"),
+        tt.tensor4("scale"),
+        tt.tensor4("bias"),
+        tt.tensor4("dy"),
    )
    data_shape = (5, 10, 30, 25)
    param_shape = (1, 10, 30, 25)
@@ -438,7 +438,7 @@ def test_batch_normalization_train_without_running_averages():
        x, scale, bias, "per-activation"
    )
    # backward pass
-    grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
+    grads = tt.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
    # compile
    f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
    # check if the abstract Ops have been replaced
@@ -465,7 +465,7 @@ def test_batch_normalization_train_without_running_averages():

 def test_batch_normalization_train_broadcast():
    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
+        for vartype in (tt.tensor5, tt.tensor4, tt.tensor3, tt.matrix, tt.vector):
            x = vartype("x")
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
@@ -492,7 +492,7 @@ def test_batch_normalization_train_broadcast():
                params_dimshuffle[axis] = i

            # construct non-broadcasted parameter variables
-            param_type = T.TensorType(x.dtype, (False,) * len(non_bc_axes))
+            param_type = tt.TensorType(x.dtype, (False,) * len(non_bc_axes))
            scale, bias, running_mean, running_var = (
                param_type(n) for n in ("scale", "bias", "running_mean", "running_var")
            )
@@ -545,7 +545,7 @@ def test_batch_normalization_train_broadcast():

            # compile to compute all differences
            f = theano.function(
-                [x, scale, bias, running_mean, running_var], T.sum(sum(results))
+                [x, scale, bias, running_mean, running_var], tt.sum(sum(results))
            )

            # the paired ops are exactly the same, so the optimizer should have
@@ -570,7 +570,7 @@ def test_batch_normalization_train_broadcast():
 @pytest.mark.slow
 def test_batch_normalization_test():
    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (T.tensor5, T.tensor3, T.vector):
+        for vartype in (tt.tensor5, tt.tensor3, tt.vector):
            x, scale, bias, mean, var = (
                vartype(n) for n in ("x", "scale", "bias", "mean", "var")
            )
@@ -593,14 +593,16 @@ def test_batch_normalization_test():
            else:
                axes2 = axes
            scale2, bias2, mean2, var2 = (
-                T.addbroadcast(t, *axes2) for t in (scale, bias, mean, var)
+                tt.addbroadcast(t, *axes2) for t in (scale, bias, mean, var)
            )
-            out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
+            out2 = (x - mean2) * (scale2 / tt.sqrt(var2 + eps)) + bias2
            # backward pass
            dy = vartype("dy")
-            grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
+            grads = tt.grad(
+                None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}
+            )
            # reference backward pass
-            grads2 = T.grad(
+            grads2 = tt.grad(
                None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}
            )
            # compile
@@ -649,7 +651,7 @@ def test_batch_normalization_test():
 def test_batch_normalization_broadcastable():
    # check if the broadcastable pattern is preserved by the optimizations
    x, dy, scale, bias, mean, var = (
-        T.scalar(n).dimshuffle(["x"] * 5)
+        tt.scalar(n).dimshuffle(["x"] * 5)
        for n in ("x", "dy", "scale", "bias", "mean", "var")
    )

@@ -659,8 +661,8 @@ def test_batch_normalization_broadcastable():
    )
    out_test = bn.batch_normalization_test(x, scale, bias, mean, var, "spatial")
    # backward pass
-    grads_train = T.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
-    grads_test = T.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
+    grads_train = tt.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
+    grads_test = tt.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
    # compile
    f = theano.function(
        [x, scale, bias, mean, var, dy],

--- a/tests/tensor/nnet/test_conv.py
+++ b/tests/tensor/nnet/test_conv.py
@@ -5,9 +5,9 @@ import pytest
 import numpy as np

 import theano
-import theano.tensor as T
+import theano.tensor as tt

-from theano.tensor.nnet import conv
+from theano.tensor.nnet import conv, conv2d
 from theano.tensor.basic import _allclose, NotScalarConstantError

 from tests import unittest_tools as utt
@@ -28,9 +28,9 @@ class TestConv2D(utt.InferShapeTester):
    conv2d = staticmethod(conv.conv2d)

    def setup_method(self):
-        self.input = T.tensor4("input", dtype=self.dtype)
+        self.input = tt.tensor4("input", dtype=self.dtype)
        self.input.name = "default_V"
-        self.filters = T.tensor4("filters", dtype=self.dtype)
+        self.filters = tt.tensor4("filters", dtype=self.dtype)
        self.filters.name = "default_filters"
        super().setup_method()

@@ -64,12 +64,12 @@ class TestConv2D(utt.InferShapeTester):
        """
        if N_image_shape is None:
            N_image_shape = [
-                T.get_scalar_constant_value(T.as_tensor_variable(x))
+                tt.get_scalar_constant_value(tt.as_tensor_variable(x))
                for x in image_shape
            ]
        if N_filter_shape is None:
            N_filter_shape = [
-                T.get_scalar_constant_value(T.as_tensor_variable(x))
+                tt.get_scalar_constant_value(tt.as_tensor_variable(x))
                for x in filter_shape
            ]

@@ -391,7 +391,7 @@ class TestConv2D(utt.InferShapeTester):
    def test_shape_Constant_tensor(self):
        # Tests convolution where the {image,filter}_shape is a Constant tensor.

-        as_t = T.as_tensor_variable
+        as_t = tt.as_tensor_variable
        self.validate((as_t(3), as_t(2), as_t(7), as_t(5)), (5, 2, 2, 3), "valid")
        self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), "valid")
        self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), "valid")
@@ -563,11 +563,11 @@ class TestConv2D(utt.InferShapeTester):
        # Make sure errors are raised when image and kernel are not 4D tensors

        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=T.dmatrix())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=tt.dmatrix())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=T.dvector())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=tt.dvector())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=T.dtensor3())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=tt.dtensor3())

    def test_gcc_crash(self):
        # gcc 4.3.0 20080428 (Red Hat 4.3.0-8)
@@ -629,8 +629,8 @@ class TestConv2D(utt.InferShapeTester):
            r = np.asarray(np.random.rand(*shape), dtype="float64")
            return r * 2 - 1

-        adtens = T.dtensor4()
-        bdtens = T.dtensor4()
+        adtens = tt.dtensor4()
+        bdtens = tt.dtensor4()
        aivec_val = [4, 5, 6, 3]
        bivec_val = [7, 5, 3, 2]
        adtens_val = rand(*aivec_val)
@@ -737,20 +737,18 @@ class TestConv2D(utt.InferShapeTester):
 # code from that ticket.
 def test_broadcast_grad():
    # rng = numpy.random.RandomState(utt.fetch_seed())
-    x1 = T.tensor4("x")
+    x1 = tt.tensor4("x")
    # x1_data = rng.randn(1, 1, 300, 300)
-    sigma = T.scalar("sigma")
+    sigma = tt.scalar("sigma")
    # sigma_data = 20
    window_radius = 3

-    filter_1d = T.arange(-window_radius, window_radius + 1)
+    filter_1d = tt.arange(-window_radius, window_radius + 1)
    filter_1d = filter_1d.astype(theano.config.floatX)
-    filter_1d = T.exp(-0.5 * filter_1d ** 2 / sigma ** 2)
+    filter_1d = tt.exp(-0.5 * filter_1d ** 2 / sigma ** 2)
    filter_1d = filter_1d / filter_1d.sum()

    filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])

-    y = theano.tensor.nnet.conv2d(
-        x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None]
-    )
+    y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None])
    theano.grad(y.sum(), sigma)
--- a/tests/tensor/nnet/test_corr.py
+++ b/tests/tensor/nnet/test_corr.py
 import pytest
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt

 from six import integer_types
 from theano.tensor.nnet import corr, conv
@@ -29,9 +29,9 @@ class TestCorr2D(utt.InferShapeTester):
    dtype = theano.config.floatX

    def setup_method(self):
-        self.input = T.tensor4("input", dtype=self.dtype)
+        self.input = tt.tensor4("input", dtype=self.dtype)
        self.input.name = "default_V"
-        self.filters = T.tensor4("filters", dtype=self.dtype)
+        self.filters = tt.tensor4("filters", dtype=self.dtype)
        self.filters.name = "default_filters"
        # This tests can run even when theano.config.blas.ldflags is empty.
        super().setup_method()
@@ -55,10 +55,10 @@ class TestCorr2D(utt.InferShapeTester):
        if not theano.config.cxx:
            pytest.skip("Need cxx to test conv2d")
        N_image_shape = [
-            T.get_scalar_constant_value(T.as_tensor_variable(x)) for x in image_shape
+            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in image_shape
        ]
        N_filter_shape = [
-            T.get_scalar_constant_value(T.as_tensor_variable(x)) for x in filter_shape
+            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in filter_shape
        ]

        if input is None:
@@ -255,7 +255,7 @@ class TestCorr2D(utt.InferShapeTester):
    def test_shape_Constant_tensor(self):
        # Tests correlation where the {image,filter}_shape is a Constant tensor.

-        as_t = T.as_tensor_variable
+        as_t = tt.as_tensor_variable
        border_modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), (3, 3), 1]

        for border_mode in border_modes:
@@ -290,11 +290,11 @@ class TestCorr2D(utt.InferShapeTester):
        # Make sure errors are raised when image and kernel are not 4D tensors

        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=T.dmatrix())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=tt.dmatrix())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=T.dvector())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=tt.dvector())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=T.dtensor3())
+            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=tt.dtensor3())

    @pytest.mark.skipif(not theano.config.cxx, reason="Need cxx for this test")
    def test_dtype_upcast(self):
@@ -313,8 +313,8 @@ class TestCorr2D(utt.InferShapeTester):
            for a_dtype in dtypes:
                for b_dtype in dtypes:
                    c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
-                    a_tens = T.tensor4(dtype=a_dtype)
-                    b_tens = T.tensor4(dtype=b_dtype)
+                    a_tens = tt.tensor4(dtype=a_dtype)
+                    b_tens = tt.tensor4(dtype=b_dtype)
                    a_tens_val = rand(a_shape, dtype=a_dtype)
                    b_tens_val = rand(b_shape, dtype=b_dtype)

@@ -334,8 +334,8 @@ class TestCorr2D(utt.InferShapeTester):

        corrMM = corr.CorrMM

-        adtens = T.dtensor4()
-        bdtens = T.dtensor4()
+        adtens = tt.dtensor4()
+        bdtens = tt.dtensor4()
        aivec_vals = [
            [4, 5, 6, 3],
            [6, 2, 8, 3],
@@ -385,8 +385,8 @@ class TestCorr2D(utt.InferShapeTester):
        corrMM = corr.CorrMM
        gradW = corr.CorrMM_gradWeights

-        adtens = T.dtensor4()
-        bdtens = T.dtensor4()
+        adtens = tt.dtensor4()
+        bdtens = tt.dtensor4()
        aivec_vals = [
            [1, 5, 6, 3],
            [8, 2, 7, 3],
@@ -441,8 +441,8 @@ class TestCorr2D(utt.InferShapeTester):
        corrMM = corr.CorrMM
        gradI = corr.CorrMM_gradInputs

-        adtens = T.dtensor4()
-        bdtens = T.dtensor4()
+        adtens = tt.dtensor4()
+        bdtens = tt.dtensor4()
        aivec_vals = [
            [1, 5, 6, 3],
            [8, 2, 7, 3],
@@ -510,8 +510,8 @@ class TestGroupCorr2d(TestGroupedConvNoOptim):
        groups = 3
        bottom = np.random.rand(3, 6, 5, 5).astype(theano.config.floatX)
        kern = np.random.rand(9, 2, 3, 3).astype(theano.config.floatX)
-        bottom_sym = T.tensor4("bottom")
-        kern_sym = T.tensor4("kern")
+        bottom_sym = tt.tensor4("bottom")
+        kern_sym = tt.tensor4("kern")

        # grouped convolution graph
        conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym)
@@ -527,7 +527,7 @@ class TestGroupCorr2d(TestGroupedConvNoOptim):
            )
            for i in range(groups)
        ]
-        concatenated_output = T.concatenate(split_conv_output, axis=1)
+        concatenated_output = tt.concatenate(split_conv_output, axis=1)
        conv_func = theano.function(
            [bottom_sym, kern_sym], concatenated_output, mode=self.mode
        )

--- a/tests/tensor/nnet/test_corr3d.py
+++ b/tests/tensor/nnet/test_corr3d.py
 import pytest
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt

 from six import integer_types

@@ -23,9 +23,9 @@ class TestCorr3D(utt.InferShapeTester):
    dtype = theano.config.floatX

    def setup_method(self):
-        self.input = T.tensor5("input", dtype=self.dtype)
+        self.input = tt.tensor5("input", dtype=self.dtype)
        self.input.name = "default_V"
-        self.filters = T.tensor5("filters", dtype=self.dtype)
+        self.filters = tt.tensor5("filters", dtype=self.dtype)
        self.filters.name = "default_filters"
        # This tests can run even when theano.config.blas.ldflags is empty.
        super().setup_method()
@@ -50,10 +50,10 @@ class TestCorr3D(utt.InferShapeTester):
            pytest.skip("Need cxx for this test")

        N_image_shape = [
-            T.get_scalar_constant_value(T.as_tensor_variable(x)) for x in image_shape
+            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in image_shape
        ]
        N_filter_shape = [
-            T.get_scalar_constant_value(T.as_tensor_variable(x)) for x in filter_shape
+            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in filter_shape
        ]

        if input is None:
@@ -296,7 +296,7 @@ class TestCorr3D(utt.InferShapeTester):
    )
    def test_shape_Constant_tensor(self, border_mode):
        # Tests correlation where the {image,filter}_shape is a Constant tensor
-        as_t = T.as_tensor_variable
+        as_t = tt.as_tensor_variable
        self.validate(
            (as_t(3), as_t(2), as_t(7), as_t(5), as_t(5)), (5, 2, 2, 3, 3), border_mode
        )
@@ -327,13 +327,17 @@ class TestCorr3D(utt.InferShapeTester):
    def test_wrong_input(self):
        # Make sure errors are raised when image and kernel are not 5D tensors
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=T.dmatrix())
+            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=tt.dmatrix())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=T.vector())
+            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=tt.vector())
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=T.dtensor3())
+            self.validate(
+                (3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=tt.dtensor3()
+            )
        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=T.dtensor4())
+            self.validate(
+                (3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=tt.dtensor4()
+            )

    @pytest.mark.skipif(not theano.config.cxx, reason="Need cxx for this test")
    def test_dtype_upcast(self):
@@ -352,8 +356,8 @@ class TestCorr3D(utt.InferShapeTester):
            for a_dtype in dtypes:
                for b_dtype in dtypes:
                    c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
-                    a_tens = T.tensor5(dtype=a_dtype)
-                    b_tens = T.tensor5(dtype=b_dtype)
+                    a_tens = tt.tensor5(dtype=a_dtype)
+                    b_tens = tt.tensor5(dtype=b_dtype)
                    a_tens_val = rand(a_shape, dtype=a_dtype)
                    b_tens_val = rand(b_shape, dtype=b_dtype)

@@ -373,8 +377,8 @@ class TestCorr3D(utt.InferShapeTester):

        corr3dMM = corr3d.Corr3dMM

-        adtens = T.dtensor5()
-        bdtens = T.dtensor5()
+        adtens = tt.dtensor5()
+        bdtens = tt.dtensor5()
        aivec_vals = [
            [4, 5, 6, 3, 3],
            [6, 2, 8, 3, 3],
@@ -422,8 +426,8 @@ class TestCorr3D(utt.InferShapeTester):
        corr3dMM = corr3d.Corr3dMM
        gradW = corr3d.Corr3dMMGradWeights

-        adtens = T.dtensor5()
-        bdtens = T.dtensor5()
+        adtens = tt.dtensor5()
+        bdtens = tt.dtensor5()
        aivec_vals = [
            [1, 5, 6, 3, 3],
            [8, 2, 7, 3, 3],
@@ -482,8 +486,8 @@ class TestCorr3D(utt.InferShapeTester):
        corr3dMM = corr3d.Corr3dMM
        gradI = corr3d.Corr3dMMGradInputs

-        adtens = T.dtensor5()
-        bdtens = T.dtensor5()
+        adtens = tt.dtensor5()
+        bdtens = tt.dtensor5()
        aivec_vals = [
            [1, 5, 6, 3, 3],
            [8, 2, 7, 3, 3],

--- a/tests/tensor/nnet/test_ctc.py
+++ b/tests/tensor/nnet/test_ctc.py
 import pytest
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt

 from theano.tensor.nnet.ctc import (
    ctc_available,
@@ -128,7 +128,7 @@ class TestCTC:

        t_cost = ctc(t_activations, t_labels, t_activation_times)
        # Symbolic gradient of CTC cost
-        t_grad = T.grad(T.mean(t_cost), t_activations)
+        t_grad = tt.grad(tt.mean(t_cost), t_activations)
        # Compile symbolic functions
        train = theano.function([], [t_cost, t_grad])


--- a/tests/tensor/nnet/test_neighbours.py
+++ b/tests/tensor/nnet/test_neighbours.py
 import pytest
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt

 from theano import change_flags
 from theano import shared, function
@@ -30,7 +30,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
                    images = shared(
                        np.arange(np.prod(shape), dtype=dtype).reshape(shape)
                    )
-                    neib_shape = T.as_tensor_variable(pshape)
+                    neib_shape = tt.as_tensor_variable(pshape)

                    f = function(
                        [],
@@ -60,7 +60,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        shape = (2, 3, 4, 4)
        for dtype in self.dtypes:
            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
-            neib_shape = T.as_tensor_variable((2, 2))
+            neib_shape = tt.as_tensor_variable((2, 2))

            for border in ["valid", "ignore_borders"]:
                f = function(
@@ -114,8 +114,8 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
            images = shared(
                np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
            )
-            neib_shape = T.as_tensor_variable((3, 3))
-            neib_step = T.as_tensor_variable((2, 2))
+            neib_shape = tt.as_tensor_variable((3, 3))
+            neib_step = tt.as_tensor_variable((2, 2))
            for border in ["valid", "ignore_borders"]:
                f = function(
                    [],
@@ -170,7 +170,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))

            for neib_shape in [(3, 2), (2, 3)]:
-                neib_shape = T.as_tensor_variable(neib_shape)
+                neib_shape = tt.as_tensor_variable(neib_shape)
                f = function([], images2neibs(images, neib_shape), mode=self.mode)
                with pytest.raises(TypeError):
                    f()
@@ -252,8 +252,8 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
                images = shared(
                    np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
                )
-                neib_shape = T.as_tensor_variable(neib_shape)
-                neib_step = T.as_tensor_variable(neib_step)
+                neib_shape = tt.as_tensor_variable(neib_shape)
+                neib_step = tt.as_tensor_variable(neib_step)
                expected = np.asarray(expected)

                f = function(
@@ -304,8 +304,8 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
                        x.shape[2] + 2 * extra[0],
                        x.shape[3] + 2 * extra[1],
                    )
-                    padded_x = T.zeros(padded_shape)
-                    padded_x = T.set_subtensor(
+                    padded_x = tt.zeros(padded_shape)
+                    padded_x = tt.set_subtensor(
                        padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
                    )
                    x_using_valid = images2neibs(
@@ -341,8 +341,8 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
                        x.shape[2] + 2 * extra[0],
                        x.shape[3] + 2 * extra[1],
                    )
-                    padded_x = T.zeros(padded_shape)
-                    padded_x = T.set_subtensor(
+                    padded_x = tt.zeros(padded_shape)
+                    padded_x = tt.set_subtensor(
                        padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
                    )
                    x_using_valid = images2neibs(
@@ -361,7 +361,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))

            for neib_shape in [(3, 2), (2, 3)]:
-                neib_shape = T.as_tensor_variable(neib_shape)
+                neib_shape = tt.as_tensor_variable(neib_shape)

                f = function(
                    [],
@@ -373,7 +373,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):

            for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]:
                images = shared(np.arange(np.prod(shape)).reshape(shape))
-                neib_shape = T.as_tensor_variable((3, 3))
+                neib_shape = tt.as_tensor_variable((3, 3))
                f = function(
                    [],
                    images2neibs(images, neib_shape, mode="wrap_centered"),
@@ -385,7 +385,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
            # Test a valid shapes
            shape = (2, 3, 3, 3)
            images = shared(np.arange(np.prod(shape)).reshape(shape))
-            neib_shape = T.as_tensor_variable((3, 3))
+            neib_shape = tt.as_tensor_variable((3, 3))

            f = function(
                [],
@@ -467,33 +467,33 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):

    def test_neibs_valid_with_inconsistent_borders(self):
        shape = (2, 3, 5, 5)
-        images = T.dtensor4()
+        images = tt.dtensor4()
        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)

        f = theano.function(
-            [images], T.sqr(images2neibs(images, (2, 2), mode="valid")), mode=self.mode
+            [images], tt.sqr(images2neibs(images, (2, 2), mode="valid")), mode=self.mode
        )
        with pytest.raises(TypeError):
            f(images_val)

    def test_neibs_half_with_inconsistent_borders(self):
        shape = (2, 3, 5, 5)
-        images = T.dtensor4()
+        images = tt.dtensor4()
        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)

        f = theano.function(
-            [images], T.sqr(images2neibs(images, (2, 2), mode="half")), mode=self.mode
+            [images], tt.sqr(images2neibs(images, (2, 2), mode="half")), mode=self.mode
        )
        with pytest.raises(TypeError):
            f(images_val)

    def test_neibs_full_with_inconsistent_borders(self):
        shape = (2, 3, 5, 5)
-        images = T.dtensor4()
+        images = tt.dtensor4()
        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)

        f = theano.function(
-            [images], T.sqr(images2neibs(images, (2, 2), mode="full")), mode=self.mode
+            [images], tt.sqr(images2neibs(images, (2, 2), mode="full")), mode=self.mode
        )
        with pytest.raises(TypeError):
            f(images_val)
@@ -503,12 +503,12 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        # or that we crash in a few other case found while
        # investigating that case

-        img = T.tensor4("img")
-        patches = T.nnet.neighbours.images2neibs(img, [16, 16])
+        img = tt.tensor4("img")
+        patches = tt.nnet.neighbours.images2neibs(img, [16, 16])
        extractPatches = theano.function([img], patches, mode=self.mode)

-        patsRecovery = T.matrix("patsRecovery")
-        original_size = T.ivector("original_size")
+        patsRecovery = tt.matrix("patsRecovery")
+        original_size = tt.ivector("original_size")

        for mode in ["valid", "ignore_borders"]:
            out = neibs2images(patsRecovery, (16, 16), original_size, mode=mode)
@@ -528,7 +528,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
    def speed_neibs(self):
        shape = (100, 40, 18, 18)
        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = T.as_tensor_variable((3, 3))
+        neib_shape = tt.as_tensor_variable((3, 3))

        f = function([], images2neibs(images, neib_shape), mode=self.mode)

@@ -538,7 +538,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
    def speed_neibs_wrap_centered(self):
        shape = (100, 40, 18, 18)
        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = T.as_tensor_variable((3, 3))
+        neib_shape = tt.as_tensor_variable((3, 3))

        f = function(
            [], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode
@@ -550,7 +550,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
    def speed_neibs_half(self):
        shape = (100, 40, 18, 18)
        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = T.as_tensor_variable((3, 3))
+        neib_shape = tt.as_tensor_variable((3, 3))

        f = function([], images2neibs(images, neib_shape, mode="half"), mode=self.mode)

@@ -560,7 +560,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
    def speed_neibs_full(self):
        shape = (100, 40, 18, 18)
        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = T.as_tensor_variable((3, 3))
+        neib_shape = tt.as_tensor_variable((3, 3))

        f = function([], images2neibs(images, neib_shape, mode="full"), mode=self.mode)

@@ -570,7 +570,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
    def test_infer_shape(self):
        shape = (100, 40, 6, 3)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 1), mode="valid")],
@@ -585,7 +585,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        )
        shape = (100, 40, 5, 4)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 1), mode="ignore_borders")],
@@ -594,7 +594,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        )
        shape = (100, 40, 5, 3)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 3), mode="ignore_borders")],
@@ -604,7 +604,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):

        shape = (100, 40, 6, 7)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 2), mode="ignore_borders")],
@@ -613,7 +613,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        )
        shape = (100, 40, 5, 10)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(3, 3), mode="wrap_centered")],
@@ -622,7 +622,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        )
        shape = (100, 40, 6, 4)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 1), mode="half")],
@@ -637,7 +637,7 @@ class TestImages2Neibs(unittest_tools.InferShapeTester):
        )
        shape = (100, 40, 6, 5)
        images = np.ones(shape).astype("float32")
-        x = T.ftensor4()
+        x = tt.ftensor4()
        self._compile_and_check(
            [x],
            [images2neibs(x, neib_shape=(2, 1), mode="full")],

--- a/tests/tensor/nnet/test_nnet.py
+++ b/tests/tensor/nnet/test_nnet.py
@@ -3,11 +3,9 @@ import pytest
 import numpy as np

 import theano
-
+import theano.tensor as tt

 from theano import config
-from theano import tensor as T
-from theano import tensor
 from theano import gof
 from theano.gof.opt import check_stack_trace
 from theano import printing
@@ -42,9 +40,10 @@ from theano.tensor.nnet import (
    binary_crossentropy,
    sigmoid_binary_crossentropy,
    confusion_matrix,
+    logsoftmax,
 )
 from theano.tensor import matrix, vector, lvector, scalar
-from theano.tensor.nnet.nnet import softsign
+from theano.tensor.nnet.nnet import softsign, LogSoftmax

 from tests import unittest_tools as utt
 from tests.tensor.test_basic import (
@@ -99,7 +98,7 @@ class TestSoftmax(utt.InferShapeTester):
        self._compile_and_check([admat], [Softmax()(admat)], [admat_val], Softmax)

    def test_vector(self):
-        x = T.vector()
+        x = tt.vector()
        f = theano.function([x], softmax_op(x))

        xv = np.random.randn(6).astype(config.floatX)
@@ -144,8 +143,8 @@ class TestSoftmaxWithBias(utt.InferShapeTester):
        )
        W = theano.shared(value=initial_W, name="W")
        vbias = theano.shared(value=0.1, name="vbias")  # 0.01
-        hid = T.vector("hid")
-        f = theano.function([hid], T.nnet.softmax_op(T.dot(hid, W.T) + vbias))
+        hid = tt.vector("hid")
+        f = theano.function([hid], softmax_op(tt.dot(hid, W.T) + vbias))
        ops = [node.op for node in f.maker.fgraph.toposort()]
        assert softmax_with_bias not in ops
        assert softmax_op in ops
@@ -156,7 +155,7 @@ class TestSoftmaxWithBias(utt.InferShapeTester):
    def test_softmax_with_bias_trace(self):
        a = theano.shared(np.random.randn(3).astype(config.floatX))
        b = theano.shared(np.float32(np.random.randn()))
-        sm = T.nnet.softmax(a + b)
+        sm = softmax(a + b)
        f = theano.function([], sm)
        assert check_stack_trace(f, ops_to_check="last")

@@ -202,7 +201,7 @@ class TestLogSoftmax(utt.InferShapeTester):
        utt.verify_grad(f, [np.random.rand(3, 4)])

    def test_vector(self):
-        x = T.vector()
+        x = tt.vector()
        f = theano.function([x], logsoftmax_op(x))

        xv = np.random.randn(6).astype(config.floatX)
@@ -218,16 +217,16 @@ class TestLogSoftmax(utt.InferShapeTester):
        m = theano.config.mode
        m = theano.compile.get_mode(m)
        m.check_isfinite = False
-        x, y = tensor.matrices("xy")
+        x, y = tt.matrices("xy")
        # regular softmax and crossentropy
-        sm = tensor.nnet.softmax(x)
-        cm = tensor.nnet.categorical_crossentropy(sm, y)
+        sm = softmax(x)
+        cm = categorical_crossentropy(sm, y)

        # numerically stable log-softmax with crossentropy
-        logsm = tensor.nnet.logsoftmax(x)
-        sm2 = tensor.exp(logsm)  # just used to show equivalence with sm
-        cm2 = -tensor.sum(y * logsm, axis=1)
-        grad = tensor.grad(cm2.mean(), x)
+        logsm = logsoftmax(x)
+        sm2 = tt.exp(logsm)  # just used to show equivalence with sm
+        cm2 = -tt.sum(y * logsm, axis=1)
+        grad = tt.grad(cm2.mean(), x)

        # create some inputs into a softmax that are large and labels
        a = np.exp(10 * np.random.rand(5, 10).astype(theano.config.floatX))
@@ -263,14 +262,12 @@ class TestLogSoftmax(utt.InferShapeTester):
        # Check that Log(Softmax(x)) is substituted with Logsoftmax(x). Note that
        # only the forward pass is checked (i.e., doesn't check the gradient)

-        x, y = tensor.matrices("xy")
-        sm = tensor.nnet.softmax(x)
-        logsm = tensor.log(sm)
+        x, y = tt.matrices("xy")
+        sm = softmax(x)
+        logsm = tt.log(sm)
        f = theano.function([x], logsm)
-        assert isinstance(
-            f.maker.fgraph.outputs[0].owner.op, theano.tensor.nnet.nnet.LogSoftmax
-        )
-        assert check_stack_trace(f, ops_to_check=theano.tensor.nnet.nnet.LogSoftmax)
+        assert isinstance(f.maker.fgraph.outputs[0].owner.op, LogSoftmax)
+        assert check_stack_trace(f, ops_to_check=LogSoftmax)

    def test_local_softmax_grad_optimization_and_big_input(self):
        # Test the Logsoftmax's grad substitution.
@@ -287,8 +284,8 @@ class TestLogSoftmax(utt.InferShapeTester):
        a = np.exp(10 * np.random.rand(5, 10).astype(theano.config.floatX))

        def myfunc(x):
-            sm = tensor.nnet.softmax(x)
-            logsm = tensor.log(sm)
+            sm = softmax(x)
+            logsm = tt.log(sm)
            return logsm

        # We set step to 0.1 because for big values we need a big epsilon
@@ -302,19 +299,17 @@ class TestLogSoftmax(utt.InferShapeTester):
        # but with a different elemwise operation than true_div is not
        # optimized.

-        x = T.matrix("x")
-        y = T.log(T.nnet.softmax(x))
-        g = T.grad(y.sum(), x)
+        x = tt.matrix("x")
+        y = tt.log(softmax(x))
+        g = tt.grad(y.sum(), x)

        softmax_grad_node = g.owner
        assert softmax_grad_node.op == softmax_grad
        true_div_node = softmax_grad_node.inputs[0].owner
-        assert true_div_node.op == tensor.true_div
+        assert true_div_node.op == tt.true_div

        # We replace the elemwise true_div op by an elemwise add.
-        new_g = softmax_grad(
-            tensor.add(*true_div_node.inputs), softmax_grad_node.inputs[1]
-        )
+        new_g = softmax_grad(tt.add(*true_div_node.inputs), softmax_grad_node.inputs[1])

        fgraph = gof.FunctionGraph([x], [new_g])
        theano.compile.mode.optdb.query(theano.compile.mode.OPT_FAST_RUN).optimize(
@@ -361,7 +356,7 @@ class TestCrossentropySoftmax1Hot:
        y_idx = [3]

        def f(a):
-            return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
+            return crossentropy_softmax_1hot(tt.shape_padleft(a), y_idx)[0]

        utt.verify_grad(f, [np.random.rand(4)])

@@ -369,7 +364,7 @@ class TestCrossentropySoftmax1Hot:
        y_idx = [3]

        def f(a, b):
-            return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
+            return crossentropy_softmax_1hot(tt.shape_padleft(a) + b, y_idx)[0]

        utt.verify_grad(f, [np.random.rand(4), np.random.rand(4)])

@@ -380,7 +375,7 @@ class TestCrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
            def f(sm):
                # Class indices
                y = np.random.randint(low=0, high=5, size=10).astype(class_dtype)
-                return theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
+                return crossentropy_softmax_1hot_with_bias_dx(
                    np.random.rand(10), sm, y  # Gradient w.r.t. NLL.  # Softmax output.
                )

@@ -398,7 +393,7 @@ class TestCrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)

        def f(dy):
-            return theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
+            return crossentropy_softmax_1hot_with_bias_dx(
                dy, softmax_output, rng.randint(low=0, high=5, size=10)
            )

@@ -438,7 +433,7 @@ class TestCrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):

 class TestCrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
    def setup_method(self):
-        self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
+        self.op = crossentropy_softmax_argmax_1hot_with_bias
        super().setup_method()

    def test_grads(self):
@@ -504,7 +499,7 @@ class TestCrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):

 class TestPrepend(utt.InferShapeTester):
    def test_prepend_constant(self):
-        x = tensor.matrix("x")
+        x = tt.matrix("x")
        y = Prepend_scalar_constant_to_each_row(4.0)(x)
        f = theano.function([x], y)
        m = np.random.rand(3, 5).astype(config.floatX)
@@ -514,7 +509,7 @@ class TestPrepend(utt.InferShapeTester):

    def test_prepend_basic(self):
        """Test basic functionality."""
-        x = tensor.matrix("x")
+        x = tt.matrix("x")
        y = Prepend_scalar_to_each_row()(5.0, x)
        f = theano.function([x], y)
        m = np.ones((3, 5), dtype="float32")
@@ -562,8 +557,8 @@ class TestCrossentropyCategorical1HotGrad(utt.InferShapeTester):

 class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
    def test_grad(self):
-        x = tensor.matrix("x")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.matrix("x")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        xe = op(x, one_of_n)
        f = theano.function([x, one_of_n], xe)
@@ -574,7 +569,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        def oplike(x):
            return op(x, [0, 1])

-        tensor.verify_grad(oplike, [x_val], rng=np.random)
+        tt.verify_grad(oplike, [x_val], rng=np.random)

    def test_infer_shape(self):
        admat = matrix()
@@ -590,8 +585,8 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        )

    def test_softmax_optimizations(self):
-        x = tensor.matrix("x")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.matrix("x")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        # xe = op(x, one_of_n)

@@ -604,8 +599,8 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_optimizations_vector(self):
-        x = tensor.vector("x")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.vector("x")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        fgraph = gof.FunctionGraph([x, one_of_n], [op(softmax_op(x), one_of_n)])
        assert fgraph.outputs[0].owner.op == op
@@ -616,9 +611,9 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_optimizations_w_bias(self):
-        x = tensor.matrix("x")
-        b = tensor.vector("b")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.matrix("x")
+        b = tt.vector("b")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        # xe = op(x, one_of_n)

@@ -644,14 +639,14 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_optimizations_w_bias2(self):
-        x = tensor.matrix("x")
-        b = tensor.vector("b")
-        c = tensor.vector("c")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.matrix("x")
+        b = tt.vector("b")
+        c = tt.vector("c")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot

        fgraph = gof.FunctionGraph(
-            [x, b, c, one_of_n], [op(softmax_op(T.add(x, b, c)), one_of_n)]
+            [x, b, c, one_of_n], [op(softmax_op(tt.add(x, b, c)), one_of_n)]
        )
        assert fgraph.outputs[0].owner.op == op

@@ -672,9 +667,9 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_optimizations_w_bias_vector(self):
-        x = tensor.vector("x")
-        b = tensor.vector("b")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.vector("x")
+        b = tt.vector("b")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        fgraph = gof.FunctionGraph([x, b, one_of_n], [op(softmax_op(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op
@@ -695,12 +690,12 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_grad_optimizations(self):
-        x = tensor.matrix("x")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.matrix("x")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        xe = op(softmax_op(x), one_of_n)
-        sum_xe = tensor.sum(xe)
-        g_x = tensor.grad(sum_xe, x)
+        sum_xe = tt.sum(xe)
+        g_x = tt.grad(sum_xe, x)
        fgraph = gof.FunctionGraph([x, one_of_n], [g_x])
        assert check_stack_trace(
            fgraph, ops_to_check=[crossentropy_softmax_1hot_with_bias_dx, softmax_op]
@@ -737,12 +732,12 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        assert not has_softmaxdx

    def test_softmax_grad_optimizations_vector(self):
-        x = tensor.vector("x")
-        one_of_n = tensor.lvector("one_of_n")
+        x = tt.vector("x")
+        one_of_n = tt.lvector("one_of_n")
        op = crossentropy_categorical_1hot
        xe = op(softmax_op(x), one_of_n)
-        sum_xe = tensor.sum(xe)
-        g_x = tensor.grad(sum_xe, x)
+        sum_xe = tt.sum(xe)
+        g_x = tt.grad(sum_xe, x)
        fgraph = gof.FunctionGraph([x, one_of_n], [g_x])

        # print 'BEFORE'
@@ -786,16 +781,16 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        x_val = rng.randn(3, 5).astype(config.floatX)
        b_val = rng.randn(5).astype(config.floatX)
        y_val = np.asarray([2, 4, 1])
-        x = T.matrix("x")
-        b = T.vector("b")
-        y = T.lvector("y")
+        x = tt.matrix("x")
+        b = tt.vector("b")
+        y = tt.lvector("y")

        # Basic case
        expressions = [
-            T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
        ]
        for expr in expressions:
            # Verify the optimizer worked on the expressions
@@ -809,14 +804,14 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 4
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            # Also verify the gradient wrt x
-            g = theano.function([x, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], tt.grad(expr, x), mode=mode)
            assert check_stack_trace(
                g, ops_to_check=[crossentropy_softmax_1hot_with_bias_dx, softmax_op]
            )
@@ -835,10 +830,10 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x + b)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(b + x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x + b))[tt.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(b + x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in bias_expressions:
@@ -856,7 +851,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
            except Exception:
                theano.printing.debugprint(f)
                raise
-            g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, b, y], tt.grad(expr, x), mode=mode)
            assert check_stack_trace(
                g,
                ops_to_check=[
@@ -879,10 +874,10 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):

        # Test that using "mean" instead of sum works, too
        mean_expressions = [
-            T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
+            tt.mean(-tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -tt.mean(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -tt.mean(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            tt.mean(-tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in mean_expressions:
@@ -896,13 +891,13 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 6
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

-            g = theano.function([x, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], tt.grad(expr, x), mode=mode)
            assert check_stack_trace(
                g, ops_to_check=[crossentropy_softmax_1hot_with_bias_dx, softmax_op]
            )
@@ -922,10 +917,10 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

        mean_bias_expressions = [
-            T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-            -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-            -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-            T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]),
+            tt.mean(-tt.log(softmax(x + b)[tt.arange(y.shape[0]), y])),
+            -tt.mean(tt.log(softmax(b + x)[tt.arange(y.shape[0]), y])),
+            -tt.mean(tt.log(softmax(x + b))[tt.arange(y.shape[0]), y]),
+            tt.mean(-tt.log(softmax(b + x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in mean_bias_expressions:
@@ -939,11 +934,11 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 4
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
            except Exception:
                theano.printing.debugprint(f)
                raise
-            g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, b, y], tt.grad(expr, x), mode=mode)
            assert check_stack_trace(
                g,
                ops_to_check=[
@@ -972,14 +967,14 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        rng = np.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(3, 5).astype(config.floatX)
        y_val = np.asarray([2, 4, 1], dtype="int64")
-        x = T.matrix("x")
-        y = T.lvector("y")
-        yi = T.cast(y, "int32")
+        x = tt.matrix("x")
+        y = tt.lvector("y")
+        yi = tt.cast(y, "int32")
        expressions = [
-            T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-            -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-            -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
-            T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
+            tt.sum(-tt.log(softmax(x)[tt.arange(yi.shape[0]), yi])),
+            -tt.sum(tt.log(softmax(x)[tt.arange(yi.shape[0]), yi])),
+            -tt.sum(tt.log(softmax(x))[tt.arange(yi.shape[0]), yi]),
+            tt.sum(-tt.log(softmax(x))[tt.arange(yi.shape[0]), yi]),
        ]

        for expr in expressions:
@@ -991,14 +986,14 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            # Also verify the gradient wrt x
-            g = theano.function([x, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], tt.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
@@ -1021,13 +1016,13 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        x_val = rng.randn(5).astype(config.floatX)
        y_val = np.asarray([2])

-        x = T.vector("x")
-        y = T.lvector("y")
+        x = tt.vector("x")
+        y = tt.lvector("y")

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-            T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
+            tt.sum(-tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
        ]

        for expr in bias_expressions:
@@ -1038,12 +1033,12 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise
-            g = theano.function([x, y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], tt.grad(expr, x), mode=mode)
            if verbose:
                printing.debugprint(g)
            try:
@@ -1067,16 +1062,16 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        b_val = rng.randn(5).astype(config.floatX)
        y_val = np.asarray([2])

-        x = T.vector("x")
-        b = T.vector("b")
-        y = T.lvector("y")
+        x = tt.vector("x")
+        b = tt.vector("b")
+        y = tt.lvector("y")

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x + b)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(b + x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x + b))[tt.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(b + x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in bias_expressions:
@@ -1088,7 +1083,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                # [big_op, sum, dim_shuffle]
                assert len(ops) == 3
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
@@ -1097,7 +1092,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
-                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
+                g = theano.function([x, b, y], tt.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

@@ -1127,17 +1122,17 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        b_val = rng.randn(5).astype(config.floatX)
        y_val = np.asarray([2])

-        x = T.vector("x")
-        b = T.vector("b")
-        y_ = T.lvector("y_")
+        x = tt.vector("x")
+        b = tt.vector("b")
+        y_ = tt.lvector("y_")
        y = y_.flatten()

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x + b)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(b + x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x + b))[tt.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(b + x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in bias_expressions:
@@ -1149,7 +1144,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                # [big_op, sum, dim_shuffle, flatten]
                assert len(ops) <= 4
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
@@ -1158,7 +1153,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
-                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
+                g = theano.function([x, b, y], tt.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

@@ -1189,17 +1184,17 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        b_val = rng.randn(5).astype(config.floatX)
        y_val = np.asarray([2])

-        x = T.vector("x")
-        b = T.vector("b")
-        y_ = T.lvector("y_")
-        y = T.specify_shape(y_, (1,))
+        x = tt.vector("x")
+        b = tt.vector("b")
+        y_ = tt.lvector("y_")
+        y = tt.specify_shape(y_, (1,))

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(x + b)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(b + x)[tt.arange(y.shape[0]), y])),
+            -tt.sum(tt.log(softmax(x + b))[tt.arange(y.shape[0]), y]),
+            tt.sum(-tt.log(softmax(b + x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in bias_expressions:
@@ -1211,7 +1206,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                # [big_op, sum, dim_shuffle, specify_shape]
                assert len(ops) <= 4
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
-                assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)]
+                assert not [1 for o in ops if isinstance(o, tt.AdvancedSubtensor)]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
@@ -1220,7 +1215,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
-                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
+                g = theano.function([x, b, y], tt.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

@@ -1246,9 +1241,9 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
        rng = np.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(3, 5).astype(config.floatX)
        y_val = np.asarray([2, 4, 1])
-        x = T.matrix("x")
-        y = T.lvector("y")
-        a = T.scalar("a")
+        x = tt.matrix("x")
+        y = tt.lvector("y")
+        a = tt.scalar("a")

        def validate_fn_graph(func):
            # The graph of the function should not have softmax anymore
@@ -1282,22 +1277,22 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):

        # Cases to test
        expressions = [
-            a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            a * (-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
-            a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            a * T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            -a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            a * (-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y])),
-            a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            a * T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            -a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            a * (-T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
-            a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-            a * T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            -a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            a * (-T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y])),
-            a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
+            a * tt.sum(-tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -a * tt.sum(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            a * (-tt.sum(tt.log(softmax(x)[tt.arange(y.shape[0]), y]))),
+            a * tt.sum(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            a * tt.sum(-tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            -a * tt.sum(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            a * (-tt.sum(tt.log(softmax(x))[tt.arange(y.shape[0]), y])),
+            a * tt.sum(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            a * tt.mean(-tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            -a * tt.mean(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            a * (-tt.mean(tt.log(softmax(x)[tt.arange(y.shape[0]), y]))),
+            a * tt.mean(tt.log(softmax(x)[tt.arange(y.shape[0]), y])),
+            a * tt.mean(-tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            -a * tt.mean(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
+            a * (-tt.mean(tt.log(softmax(x))[tt.arange(y.shape[0]), y])),
+            a * tt.mean(tt.log(softmax(x))[tt.arange(y.shape[0]), y]),
        ]

        for expr in expressions:
@@ -1312,7 +1307,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

            # Verify the gradient wrt x
-            g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y, a], tt.grad(expr, x), mode=mode)
            try:
                assert 3 <= len(g.maker.fgraph.toposort()) <= 6
                validate_grad_graph(g)
@@ -1323,7 +1318,7 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):

            # Verify the gradient when providing output gradient
            h = theano.function(
-                [x, y, a], T.grad(expr, x, known_grads={expr: a * x.sum()}), mode=mode
+                [x, y, a], tt.grad(expr, x, known_grads={expr: a * x.sum()}), mode=mode
            )
            try:
                assert 6 <= len(h.maker.fgraph.toposort()) <= 8
@@ -1335,10 +1330,10 @@ class TestCrossentropyCategorical1Hot(utt.InferShapeTester):


 def test_argmax_pushdown():
-    x = tensor.matrix()
+    x = tt.matrix()
    for sm in [softmax_graph, softmax_op]:
        # test that the max_and_argmax is pushed down if the max is not used
-        out = tensor.max_and_argmax(sm(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[1]
+        out = tt.max_and_argmax(sm(tt.exp(tt.tanh(sigmoid(x)))), axis=-1)[1]
        fgraph = gof.FunctionGraph([x], [out])
        theano.compile.mode.optdb.query(theano.compile.mode.OPT_FAST_RUN).optimize(
            fgraph
@@ -1348,11 +1343,11 @@ def test_argmax_pushdown():
        # for node in fgraph.toposort():
        # print node.op
        assert len(fgraph.toposort()) == 1
-        assert isinstance(fgraph.toposort()[0].op, tensor.basic.Argmax)
-        assert check_stack_trace(fgraph, ops_to_check=tensor.basic.Argmax)
-        x = tensor.matrix()
+        assert isinstance(fgraph.toposort()[0].op, tt.Argmax)
+        assert check_stack_trace(fgraph, ops_to_check=tt.Argmax)
+        x = tt.matrix()
        # test that the max_and_argmax is not pushed down if the max is used
-        out = tensor.max_and_argmax(sm(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[0]
+        out = tt.max_and_argmax(sm(tt.exp(tt.tanh(sigmoid(x)))), axis=-1)[0]
        fgraph = gof.FunctionGraph([x], [out])

        assert hasattr(fgraph.outputs[0].tag, "trace")
@@ -1369,17 +1364,17 @@ def test_argmax_pushdown():
        # for node in fgraph.toposort():
        # print node.op
        assert len(fgraph.toposort()) == 3
-        assert isinstance(fgraph.toposort()[0].op, tensor.Elemwise)
+        assert isinstance(fgraph.toposort()[0].op, tt.Elemwise)
        assert isinstance(fgraph.toposort()[1].op, Softmax)
-        assert isinstance(fgraph.toposort()[2].op, tensor.CAReduce)
+        assert isinstance(fgraph.toposort()[2].op, tt.CAReduce)
        assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum)


 def test_argmax_pushdown_bias():
-    x = tensor.matrix()
-    b = tensor.vector()
+    x = tt.matrix()
+    b = tt.vector()

-    out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
+    out = tt.argmax(softmax_with_bias(x, b), axis=-1)
    fgraph = gof.FunctionGraph([x, b], [out])

    theano.compile.mode.optdb.query(theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
@@ -1387,16 +1382,16 @@ def test_argmax_pushdown_bias():
    # print 'AFTER'
    # for node in fgraph.toposort():
    #    print node.op
-    types_to_check = (tensor.DimShuffle, tensor.Elemwise, tensor.Argmax)
+    types_to_check = (tt.DimShuffle, tt.Elemwise, tt.Argmax)
    assert len(fgraph.toposort()) == 3

    for i, type in enumerate(types_to_check):
        assert isinstance(fgraph.toposort()[i].op, type)
    assert check_stack_trace(fgraph, ops_to_check=types_to_check)

-    x = tensor.matrix()
-    b = tensor.vector()
-    out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
+    x = tt.matrix()
+    b = tt.vector()
+    out = tt.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
    fgraph = gof.FunctionGraph([x, b], [out])

    backup = config.warn.argmax_pushdown_bug
@@ -1413,9 +1408,9 @@ def test_argmax_pushdown_bias():
    #    print node.op
    assert len(fgraph.toposort()) == 2
    assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias)
-    assert isinstance(fgraph.toposort()[1].op, tensor.CAReduce)
+    assert isinstance(fgraph.toposort()[1].op, tt.CAReduce)
    assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
-    assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, tensor.CAReduce))
+    assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, tt.CAReduce))


 def test_asymptotic_32():
@@ -1427,17 +1422,15 @@ def test_asymptotic_32():

    for dtype in "float32", "float64":
        if dtype == "float32":
-            x = tensor.fmatrix()
-            x2 = tensor.fvector()
+            x = tt.fmatrix()
+            x2 = tt.fvector()
        else:
-            x = tensor.dmatrix()
-            x2 = tensor.dvector()
-        y = tensor.lvector()
+            x = tt.dmatrix()
+            x2 = tt.dvector()
+        y = tt.lvector()

        c = categorical_crossentropy(softmax(x + x2), y)
-        f = theano.function(
-            [x, y, x2], [c.sum(), tensor.grad(c.sum(), x)], mode="FAST_RUN"
-        )
+        f = theano.function([x, y, x2], [c.sum(), tt.grad(c.sum(), x)], mode="FAST_RUN")

        xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
        x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
@@ -1479,8 +1472,8 @@ class TestSoftmaxOpt:
        self.mode = self.mode.including("canonicalize")

    def test_basic(self):
-        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, "x")
+        c = tt.matrix()
+        p_y = tt.exp(c) / tt.exp(c).sum(axis=1).dimshuffle(0, "x")

        # test that function contains softmax and no div.
        f = theano.function([c], p_y, mode=self.mode)
@@ -1496,8 +1489,8 @@ class TestSoftmaxOpt:
        f(self.rng.rand(3, 4).astype(config.floatX))

    def test_basic_keepdims(self):
-        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=1, keepdims=True)
+        c = tt.matrix()
+        p_y = tt.exp(c) / tt.exp(c).sum(axis=1, keepdims=True)

        # test that function contains softmax and no div.
        f = theano.function([c], p_y, mode=self.mode)
@@ -1514,15 +1507,15 @@ class TestSoftmaxOpt:

    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_grad(self):
-        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, "x")
+        c = tt.matrix()
+        p_y = tt.exp(c) / tt.exp(c).sum(axis=1).dimshuffle(0, "x")

        # test that function contains softmax and softmaxgrad
-        w = T.matrix()
+        w = tt.matrix()
        backup = config.warn.sum_div_dimshuffle_bug
        config.warn.sum_div_dimshuffle_bug = False
        try:
-            g = theano.function([c, w], T.grad((p_y * w).sum(), c))
+            g = theano.function([c, w], tt.grad((p_y * w).sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
@@ -1538,8 +1531,8 @@ class TestSoftmaxOpt:
    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_transpose_basic(self):
        # this should be a transposed softmax
-        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=0)
+        c = tt.matrix()
+        p_y = tt.exp(c) / tt.exp(c).sum(axis=0)

        # test that function contains softmax and no div.
        theano.function([c], p_y)
@@ -1549,7 +1542,7 @@ class TestSoftmaxOpt:
        backup = config.warn.sum_div_dimshuffle_bug
        config.warn.sum_div_dimshuffle_bug = False
        try:
-            theano.function([c], T.grad(p_y.sum(), c))
+            theano.function([c], tt.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        # printing.debugprint(g)
@@ -1557,8 +1550,8 @@ class TestSoftmaxOpt:
    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_1d_basic(self):
        # this should be a softmax, but of a one-row matrix
-        c = T.vector()
-        p_y = T.exp(c) / T.exp(c).sum()
+        c = tt.vector()
+        p_y = tt.exp(c) / tt.exp(c).sum()

        # test that function contains softmax and no div.
        theano.function([c], p_y)
@@ -1568,7 +1561,7 @@ class TestSoftmaxOpt:
        backup = config.warn.sum_div_dimshuffle_bug
        config.warn.sum_div_dimshuffle_bug = False
        try:
-            theano.function([c], T.grad(p_y.sum(), c))
+            theano.function([c], tt.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        # printing.debugprint(g)
@@ -1605,7 +1598,7 @@ def test_stabilize_log_softmax():

    x = matrix()
    y = softmax(x)
-    z = theano.tensor.log(y)
+    z = tt.log(y)

    f = theano.function([x], z, mode=mode)
    assert check_stack_trace(f, ops_to_check="all")
@@ -1696,8 +1689,8 @@ def test_h_softmax():
    #############
    # Build graph
    #############
-    x = tensor.matrix("x")
-    y = tensor.ivector("y")
+    x = tt.matrix("x")
+    y = tt.ivector("y")

    # This only computes the output corresponding to the target
    y_hat_tg = h_softmax(
@@ -1779,7 +1772,7 @@ def test_selu():

 def test_binary_crossentropy_reshape():
    # Reported as https://github.com/Theano/Theano/issues/4086
-    a = tensor.tensor4("a")
+    a = tt.tensor4("a")
    for c in (
        binary_crossentropy(sigmoid(a.reshape((-1, 1))), 1).sum(),
        binary_crossentropy(sigmoid(a).reshape((-1, 1)), 1).sum(),
@@ -1818,7 +1811,7 @@ class TestSigmoidBinaryCrossentropy:
        # Test sigmoid_binary_crossentropy(p, t) ==
        #      binary_crossentropy(sigmoid(p), t).

-        pred, target = inputs = tensor.vectors("pt")
+        pred, target = inputs = tt.vectors("pt")

        reference_val = binary_crossentropy(sigmoid(pred), target)
        f_reference = theano.function(inputs, reference_val)
@@ -1845,8 +1838,8 @@ def test_confusion_matrix():
        conf_mat = np.asarray(conf_mat)
        return [conf_mat, order]

-    x = tensor.vector()
-    y = tensor.vector()
+    x = tt.vector()
+    y = tt.vector()
    f = theano.function([x, y], confusion_matrix(x, y))
    list_inputs = [
        [[0, 1, 2, 1, 0], [0, 0, 2, 1, 2]],

--- a/tests/tensor/nnet/test_sigm.py
+++ b/tests/tensor/nnet/test_sigm.py
 import numpy as np
-import theano.tensor.inplace

-from theano import tensor as T, config
-from theano.tensor import basic as tensor
+import theano
+import theano.tensor as tt
+
+from theano import config
+from theano.tensor.inplace import neg_inplace
 from theano.gof.opt import check_stack_trace
 from theano.gof.toolbox import is_same_graph
 from theano.tensor.nnet import (
@@ -19,6 +21,7 @@ from theano.tensor.nnet.sigm import (
    perform_sigm_times_exp,
    register_local_1msigmoid,
    simplify_mul,
+    ScalarSoftplus,
 )

 from tests import unittest_tools as utt
@@ -124,7 +127,7 @@ class TestSigmoidOpts:
        """
        if excluding is None:
            excluding = []
-        m = theano.config.mode
+        m = config.mode
        if m == "FAST_COMPILE":
            mode = theano.compile.mode.get_mode("FAST_RUN")
        else:
@@ -137,68 +140,68 @@ class TestSigmoidOpts:
    def test_exp_over_1_plus_exp(self):
        m = self.get_mode(excluding=["local_elemwise_fusion"])

-        x = T.vector()
+        x = tt.vector()
        data = np.random.rand(54).astype(config.floatX)

        backup = config.warn.identify_1pexp_bug
        config.warn.identify_1pexp_bug = False
        try:
            # tests exp_over_1_plus_exp
-            f = theano.function([x], T.exp(x) / (1 + T.exp(x)), mode=m)
+            f = theano.function([x], tt.exp(x) / (1 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
-            f = theano.function([x], T.exp(x) / (2 + T.exp(x)), mode=m)
+            f = theano.function([x], tt.exp(x) / (2 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
-            f = theano.function([x], T.exp(x) / (1 - T.exp(x)), mode=m)
+            f = theano.function([x], tt.exp(x) / (1 - tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
-            f = theano.function([x], T.exp(x + 1) / (1 + T.exp(x)), mode=m)
+            f = theano.function([x], tt.exp(x + 1) / (1 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp
-            f = theano.function([x], T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, 1.0) / (1 + tt.exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(f, ops_to_check=sigmoid)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
-            f = theano.function([x], T.fill(x, 1.0) / (2 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, 1.0) / (2 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
-            f = theano.function([x], T.fill(x, 1.0) / (1 - T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, 1.0) / (1 - tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
-            f = theano.function([x], T.fill(x, 1.1) / (1 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, 1.1) / (1 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp with neg
-            f = theano.function([x], T.fill(x, -1.0) / (1 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, -1.0) / (1 + tt.exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(
-            #     f, ops_to_check=[sigmoid, theano.tensor.inplace.neg_inplace])
+            #     f, ops_to_check=[sigmoid, neg_inplace])
            assert [node.op for node in f.maker.fgraph.toposort()] == [
                sigmoid,
-                theano.tensor.inplace.neg_inplace,
+                neg_inplace,
            ]
            f(data)
-            f = theano.function([x], T.fill(x, -1.0) / (1 - T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, -1.0) / (1 - tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                theano.tensor.inplace.neg_inplace,
+                neg_inplace,
            ]
            f(data)
-            f = theano.function([x], T.fill(x, -1.0) / (2 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, -1.0) / (2 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                theano.tensor.inplace.neg_inplace,
+                neg_inplace,
            ]
            f(data)
-            f = theano.function([x], T.fill(x, -1.1) / (1 + T.exp(-x)), mode=m)
+            f = theano.function([x], tt.fill(x, -1.1) / (1 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                theano.tensor.inplace.neg_inplace,
+                neg_inplace,
            ]
            f(data)

@@ -208,66 +211,66 @@ class TestSigmoidOpts:
            # = - (sigm(x) * sigm(x))
            f = theano.function(
                [x],
-                (T.fill(x, -1.0) * T.exp(x)) / ((1 + T.exp(x)) * (1 + T.exp(-x))),
+                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            # todo: solve issue #4589 first
-            # assert check_stack_trace(f, ops_to_check=[sigmoid, T.mul])
-            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid, T.mul]
+            # assert check_stack_trace(f, ops_to_check=[sigmoid, tt.mul])
+            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid, tt.mul]
            f(data)
            f = theano.function(
                [x],
-                (T.fill(x, -1.1) * T.exp(x)) / ((1 + T.exp(x)) * (1 + T.exp(-x))),
+                (tt.fill(x, -1.1) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                T.mul,
-                theano.tensor.inplace.neg_inplace,
+                tt.mul,
+                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
-                (T.fill(x, -1.0) * T.exp(x)) / ((2 + T.exp(x)) * (1 + T.exp(-x))),
+                (tt.fill(x, -1.0) * tt.exp(x)) / ((2 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                T.mul,
-                theano.tensor.inplace.neg_inplace,
+                tt.mul,
+                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
-                (T.fill(x, -1.0) * T.exp(x)) / ((1 + T.exp(x)) * (2 + T.exp(-x))),
+                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (2 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                T.mul,
-                theano.tensor.inplace.neg_inplace,
+                tt.mul,
+                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
-                (T.fill(x, -1.0) * T.exp(x)) / ((1 + T.exp(x)) * (1 + T.exp(x))),
+                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                T.mul,
-                theano.tensor.inplace.neg_inplace,
+                tt.mul,
+                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
-                (T.fill(x, -1.0) * T.exp(x)) / ((1 + T.exp(x)) * (2 + T.exp(-x))),
+                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (2 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
-                T.mul,
-                theano.tensor.inplace.neg_inplace,
+                tt.mul,
+                neg_inplace,
            ]
            f(data)

@@ -280,21 +283,21 @@ class TestSigmoidOpts:
            return

        m = self.get_mode()
-        x = T.fmatrix()
+        x = tt.fmatrix()

        # tests exp_over_1_plus_exp
-        f = theano.function([x], 1 - T.exp(x) / (1 + T.exp(x)), mode=m)
-        assert check_stack_trace(f, ops_to_check=[tensor.neg, sigmoid_inplace])
+        f = theano.function([x], 1 - tt.exp(x) / (1 + tt.exp(x)), mode=m)
+        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
-            tensor.neg,
+            tt.neg,
            sigmoid_inplace,
        ]

        # tests inv_1_plus_exp
-        f = theano.function([x], 1 - T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
-        assert check_stack_trace(f, ops_to_check=[tensor.neg, sigmoid_inplace])
+        f = theano.function([x], 1 - tt.fill(x, 1.0) / (1 + tt.exp(-x)), mode=m)
+        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
-            tensor.neg,
+            tt.neg,
            sigmoid_inplace,
        ]

@@ -308,36 +311,30 @@ class TestSigmoidOpts:
            assert [node.op for node in func.maker.fgraph.toposort()] == ops

        m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"])
-        x, y = tensor.vectors("x", "y")
+        x, y = tt.vectors("x", "y")

-        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
+        f = theano.function([x], sigmoid(-x) * tt.exp(x), mode=m)
        match(f, [sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

-        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
-        match(f, [tensor.neg, sigmoid])
+        f = theano.function([x], sigmoid(x) * tt.exp(-x), mode=m)
+        match(f, [tt.neg, sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

-        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
-        match(f, [tensor.neg, sigmoid, tensor.neg])
+        f = theano.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m)
+        match(f, [tt.neg, sigmoid, tt.neg])
        # assert check_stack_trace(f, ops_to_check=sigmoid)

        f = theano.function(
            [x, y],
-            (
-                sigmoid(x)
-                * sigmoid(-y)
-                * -tensor.exp(-x)
-                * tensor.exp(x * y)
-                * tensor.exp(y)
-            ),
+            (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) * tt.exp(y)),
            mode=m,
        )
        topo = f.maker.fgraph.toposort()
-        for op, nb in [(sigmoid, 2), (tensor.mul, 2), (tensor.neg, 1), (tensor.exp, 1)]:
+        for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]:
            assert sum([n.op == op for n in topo]) == nb
-        # assert check_stack_trace(f, ops_to_check=[sigmoid, tensor.mul,
-        #                                           tensor.exp])
+        # assert check_stack_trace(f, ops_to_check=[sigmoid, tt.mul,
+        #                                           tt.exp])

    def test_perform_sigm_times_exp(self):
        # Test the core function doing the `sigm_times_exp` optimization.
@@ -345,8 +342,8 @@ class TestSigmoidOpts:
        # It is easier to test different graph scenarios this way than by
        # compiling a theano function.

-        x, y, z, t = tensor.vectors("x", "y", "z", "t")
-        exp = tensor.exp
+        x, y, z, t = tt.vectors("x", "y", "z", "t")
+        exp = tt.exp

        def ok(expr1, expr2):
            trees = [parse_mul_tree(e) for e in (expr1, expr2)]
@@ -386,11 +383,11 @@ class TestSigmoidOpts:
        # At some point, this returned nan, because (1 - sigm(x)) was
        # on both the numerator and the denominator of a fraction,
        # but the two nodes in question had not been merged.
-        x = tensor.matrix("x")
-        lr = tensor.scalar("lr")
+        x = tt.matrix("x")
+        lr = tt.scalar("lr")

        s = sigmoid(x)
-        l = T.log(1 - s)
+        l = tt.log(1 - s)
        c = l.mean()
        ux = x - lr * theano.grad(c, x)

@@ -403,7 +400,7 @@ class TestSigmoidOpts:
            assert not np.isnan(ux_v)

    def test_local_ultra_fast_sigmoid(self):
-        x = tensor.matrix("x")
+        x = tt.matrix("x")
        s = sigmoid(x)

        mode = self.get_mode("local_ultra_fast_sigmoid")
@@ -422,7 +419,7 @@ class TestSigmoidOpts:
        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])

    def test_local_hard_sigmoid(self):
-        x = tensor.matrix("x")
+        x = tt.matrix("x")
        s = sigmoid(x)

        mode = self.get_mode("local_hard_sigmoid")
@@ -440,7 +437,7 @@ class TestSigmoidOpts:

        mode2 = mode.excluding("fusion").excluding("inplace")
        f2 = theano.function([x], s, mode=mode2)
-        assert check_stack_trace(f2, ops_to_check=theano.tensor.clip)
+        assert check_stack_trace(f2, ops_to_check=tt.clip)


 class TestSoftplusOpts:
@@ -457,56 +454,56 @@ class TestSoftplusOpts:
        utt.seed_rng()

    def test_logsigm_to_softplus(self):
-        x = T.vector()
+        x = tt.vector()

-        out = T.log(sigmoid(x))
+        out = tt.log(sigmoid(x))
        f = theano.function([x], out, mode=self.m)

        # Fix ticket #4581 first
        # assert check_stack_trace(
        #     f, ops_to_check=(theano.scalar.Neg,
-        #                      theano.tensor.nnet.sigm.ScalarSoftplus))
+        #                      ScalarSoftplus))
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op.scalar_op, theano.scalar.Neg)
-        assert isinstance(topo[1].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus)
+        assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(np.random.rand(54).astype(config.floatX))

    def test_log1msigm_to_softplus(self):
-        x = T.matrix()
+        x = tt.matrix()

-        out = T.log(1 - sigmoid(x))
+        out = tt.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
-        assert isinstance(topo[0].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus)
+        assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        # assert check_stack_trace(f, ops_to_check='all')
        f(np.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
-        out = T.log(1 - T.flatten(sigmoid(x)))
+        out = tt.log(1 - tt.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)

        # assert check_stack_trace(f, ops_to_check='all')
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
-        assert tensor.is_flat(topo[0].outputs[0])
-        assert isinstance(topo[1].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus)
+        assert tt.is_flat(topo[0].outputs[0])
+        assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(np.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
-        out = T.log(1 - sigmoid(x).reshape([x.size]))
+        out = tt.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        # assert len(topo) == 3
-        assert any(isinstance(node.op, T.Reshape) for node in topo)
+        assert any(isinstance(node.op, tt.Reshape) for node in topo)
        assert any(
            isinstance(
                getattr(node.op, "scalar_op", None),
-                theano.tensor.nnet.sigm.ScalarSoftplus,
+                ScalarSoftplus,
            )
            for node in topo
        )
@@ -517,16 +514,16 @@ class TestSoftplusOpts:
        if m == "FAST_COMPILE":
            m = "FAST_RUN"

-        x = T.vector()
+        x = tt.vector()

-        out = T.log(1 + T.exp(x))
+        out = tt.log(1 + tt.exp(x))
        f = theano.function([x], out, mode=self.m)

        # Fix ticket #4581 first
        # assert check_stack_trace(f, ops_to_check='all')
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
-        assert isinstance(topo[0].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus)
+        assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
        f(np.random.rand(54).astype(config.floatX))


@@ -536,14 +533,14 @@ class TestSigmoidUtils:
    """

    def test_compute_mul(self):
-        x, y, z = tensor.vectors("x", "y", "z")
+        x, y, z = tt.vectors("x", "y", "z")
        tree = (x * y) * -z
        mul_tree = parse_mul_tree(tree)
        assert parse_mul_tree(compute_mul(mul_tree)) == mul_tree
        assert is_same_graph(compute_mul(parse_mul_tree(tree)), tree)

    def test_parse_mul_tree(self):
-        x, y, z = tensor.vectors("x", "y", "z")
+        x, y, z = tt.vectors("x", "y", "z")
        assert parse_mul_tree(x * y) == [False, [[False, x], [False, y]]]
        assert parse_mul_tree(-(x * y)) == [True, [[False, x], [False, y]]]
        assert parse_mul_tree(-x * y) == [False, [[True, x], [False, y]]]
@@ -557,8 +554,8 @@ class TestSigmoidUtils:
        backup = config.warn.identify_1pexp_bug
        config.warn.identify_1pexp_bug = False
        try:
-            x = tensor.vector("x")
-            exp = tensor.exp
+            x = tt.vector("x")
+            exp = tt.exp
            assert is_1pexp(1 + exp(x), False) == (False, x)
            assert is_1pexp(exp(x) + 1, False) == (False, x)
            for neg, exp_arg in map(