Replace theano.tensor alias T with tt in tests.tensor

ef279e19 · Brandon T. Willard · ac7b5225 · ef279e19 · ef279e19 · ef279e19
--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -11,6 +11,7 @@ import pytest
 import numpy as np
 import theano
+import theano.tensor as tt
 from tempfile import mkstemp
 from copy import copy, deepcopy
@@ -23,7 +24,7 @@ from numpy.testing import assert_array_equal, assert_allclose, assert_almost_equ
 from theano import change_flags
 from theano.compat import exc_message, operator_div
-from theano import compile, config, function, gof, tensor, shared
+from theano import compile, config, function, gof, shared
 from theano.compile import DeepCopyOp
 from theano.compile.mode import get_default_mode
 from theano.scalar import autocast_float_as, autocast_float
@@ -659,7 +660,7 @@ def makeTester(
                out_grad_vars = []
                for out in expecteds:
-                    if str(out.dtype) in tensor.discrete_dtypes:
+                    if str(out.dtype) in tt.discrete_dtypes:
                        dtype = floatX
                    else:
                        dtype = str(out.dtype)
@@ -736,11 +737,11 @@ def randc128_ranged(min, max, shape):
 def rand_of_dtype(shape, dtype):
-    if dtype in tensor.discrete_dtypes:
+    if dtype in tt.discrete_dtypes:
        return randint(*shape).astype(dtype)
-    elif dtype in tensor.float_dtypes:
+    elif dtype in tt.float_dtypes:
        return rand(*shape).astype(dtype)
-    elif dtype in tensor.complex_dtypes:
+    elif dtype in tt.complex_dtypes:
        return randcomplex(*shape).astype(dtype)
    else:
        raise TypeError()
@@ -947,8 +948,8 @@ TestMaximumInplaceBroadcast = makeBroadcastTester(
 def test_maximum_minimum_grad():
    # Test the discontinuity point.
    # We decided that we only pass the gradient to the first input in that case.
-    x, y = tensor.vectors("xy")
+    x, y = tt.vectors("xy")
-    for op in [tensor.maximum, tensor.minimum]:
+    for op in [tt.maximum, tt.minimum]:
        o = op(x, y)
        g = theano.grad(o.sum(), [x, y])
@@ -1076,13 +1077,13 @@ def _numpy_true_div(x, y):
    # because simply calling np.true_divide could cause a dtype mismatch.
    out = np.true_divide(x, y)
    # Use floatX as the result of int / int
-    if x.dtype in tensor.discrete_dtypes and y.dtype in tensor.discrete_dtypes:
+    if x.dtype in tt.discrete_dtypes and y.dtype in tt.discrete_dtypes:
        out = theano._asarray(out, dtype=config.floatX)
    return out
 TestTrueDivBroadcast = makeBroadcastTester(
-    op=tensor.true_div,
+    op=tt.true_div,
    expected=_numpy_true_div,
    good=_good_broadcast_div_mod_normal_float_no_complex,
    grad=_grad_broadcast_div_mod_normal,
@@ -1128,7 +1129,7 @@ _bad_runtime_inv = dict(
 TestInvBroadcast = makeBroadcastTester(
-    op=tensor.inv,
+    op=tt.inv,
    expected=lambda x: upcast_int8_nfunc(np.true_divide)(np.int8(1), x),
    good=_good_inv,
    bad_runtime=_bad_runtime_inv,
@@ -1147,7 +1148,7 @@ TestInvInplaceBroadcast = makeBroadcastTester(
 TestCeilIntDivBroadcast = makeBroadcastTester(
-    op=tensor.ceil_intdiv,
+    op=tt.ceil_intdiv,
    expected=lambda x, y: check_floatX((x, y), (x // y) + ((x % y) != 0)),
    good=_good_broadcast_div_mod_normal_float_no_complex,
    name="CeilIntDiv",
@@ -1157,7 +1158,7 @@ TestCeilIntDivBroadcast = makeBroadcastTester(
 )
 TestModBroadcast = makeBroadcastTester(
-    op=tensor.mod,
+    op=tt.mod,
    expected=lambda x, y: np.asarray(
        x % y, dtype=theano.scalar.basic.upcast(x.dtype, y.dtype)
    ),
@@ -1331,7 +1332,7 @@ _grad_broadcast_unary_0_2_no_complex = dict(
 # don't have a well defined behavior. We don't test that case.
 TestAbsBroadcast = makeBroadcastTester(
-    op=tensor.abs_,
+    op=tt.abs_,
    expected=lambda x: abs(x),
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1347,7 +1348,7 @@ TestAbsInplaceBroadcast = makeBroadcastTester(
 )
 TestNegBroadcast = makeBroadcastTester(
-    op=tensor.neg,
+    op=tt.neg,
    expected=lambda x: -x,
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1360,7 +1361,7 @@ TestNegInplaceBroadcast = makeBroadcastTester(
 )
 TestSgnBroadcast = makeBroadcastTester(
-    op=tensor.sgn,
+    op=tt.sgn,
    expected=np.sign,
    good=_good_broadcast_unary_normal_no_complex,
    grad=_grad_broadcast_unary_normal,
@@ -1373,7 +1374,7 @@ TestSgnInplaceBroadcast = makeBroadcastTester(
 )
 TestIntDivBroadcast = makeBroadcastTester(
-    op=tensor.int_div,
+    op=tt.int_div,
    expected=lambda x, y: check_floatX((x, y), x // y),
    good=_good_broadcast_div_mod_normal_float,
    # I don't test the grad as the output is always an integer
@@ -1393,7 +1394,7 @@ TestIntDivInplaceBroadcast = makeBroadcastTester(
 TestCeilBroadcast = makeBroadcastTester(
-    op=tensor.ceil,
+    op=tt.ceil,
    expected=upcast_float16_ufunc(np.ceil),
    good=_good_broadcast_unary_normal_no_complex,
    grad=copymod(
@@ -1415,7 +1416,7 @@ TestCeilInplaceBroadcast = makeBroadcastTester(
 )
 TestFloorBroadcast = makeBroadcastTester(
-    op=tensor.floor,
+    op=tt.floor,
    expected=upcast_float16_ufunc(np.floor),
    good=_good_broadcast_unary_normal_no_complex,
    grad=_grad_broadcast_unary_normal_noint,
@@ -1439,13 +1440,13 @@ TestTruncInplaceBroadcast = makeBroadcastTester(
 )
 TestTruncBroadcast = makeBroadcastTester(
-    op=tensor.trunc,
+    op=tt.trunc,
    expected=upcast_float16_ufunc(np.trunc),
    good=_good_broadcast_unary_normal_no_complex,
 )
 TestRoundHalfToEvenBroadcast = makeBroadcastTester(
-    op=tensor.round_half_to_even,
+    op=tt.round_half_to_even,
    expected=np.round,
    good=_good_broadcast_unary_normal_float_no_complex,
    grad=_grad_broadcast_unary_normal_no_complex_no_corner_case,
@@ -1462,7 +1463,7 @@ TestRoundHalfToEvenInplaceBroadcast = makeBroadcastTester(
 # see in their file numpy/lib/function_base.py in class vectorize.__call__
 # This happen in float32 mode.
 TestRoundHalfAwayFromZeroBroadcast = makeBroadcastTester(
-    op=tensor.round_half_away_from_zero,
+    op=tt.round_half_away_from_zero,
    expected=lambda a: theano.scalar.basic.round_half_away_from_zero_vec(a),
    good=_good_broadcast_unary_normal_float_no_empty_no_complex,
    grad=_grad_broadcast_unary_normal_no_complex_no_corner_case,
@@ -1476,7 +1477,7 @@ TestRoundHalfAwayFromZeroInplaceBroadcast = makeBroadcastTester(
 )
 TestSqrBroadcast = makeBroadcastTester(
-    op=tensor.sqr,
+    op=tt.sqr,
    expected=np.square,
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1490,7 +1491,7 @@ TestSqrInplaceBroadcast = makeBroadcastTester(
 )
 TestExpBroadcast = makeBroadcastTester(
-    op=tensor.exp,
+    op=tt.exp,
    expected=upcast_float16_ufunc(np.exp),
    good=dict(
        _good_broadcast_unary_normal,
@@ -1508,7 +1509,7 @@ TestExpInplaceBroadcast = makeBroadcastTester(
 )
 TestExp2Broadcast = makeBroadcastTester(
-    op=tensor.exp2,
+    op=tt.exp2,
    expected=upcast_float16_ufunc(np.exp2),
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1522,7 +1523,7 @@ TestExp2InplaceBroadcast = makeBroadcastTester(
 TestExpm1Broadcast = makeBroadcastTester(
-    op=tensor.expm1,
+    op=tt.expm1,
    expected=upcast_float16_ufunc(np.expm1),
    good=dict(
        _good_broadcast_unary_normal,
@@ -1557,7 +1558,7 @@ _grad_broadcast_unary_positive = dict(
 )
 TestLogBroadcast = makeBroadcastTester(
-    op=tensor.log,
+    op=tt.log,
    expected=upcast_float16_ufunc(np.log),
    good=_good_broadcast_unary_positive,
    grad=_grad_broadcast_unary_positive,
@@ -1570,7 +1571,7 @@ TestLogInplaceBroadcast = makeBroadcastTester(
 )
 TestLog2Broadcast = makeBroadcastTester(
-    op=tensor.log2,
+    op=tt.log2,
    expected=upcast_float16_ufunc(np.log2),
    good=_good_broadcast_unary_positive,
    grad=_grad_broadcast_unary_positive,
@@ -1583,7 +1584,7 @@ TestLog2InplaceBroadcast = makeBroadcastTester(
 )
 TestLog10Broadcast = makeBroadcastTester(
-    op=tensor.log10,
+    op=tt.log10,
    expected=upcast_float16_ufunc(np.log10),
    good=_good_broadcast_unary_positive,
    grad=_grad_broadcast_unary_positive,
@@ -1596,7 +1597,7 @@ TestLog10InplaceBroadcast = makeBroadcastTester(
 )
 TestLog1pBroadcast = makeBroadcastTester(
-    op=tensor.log1p,
+    op=tt.log1p,
    expected=upcast_float16_ufunc(np.log1p),
    good=_good_broadcast_unary_positive,
    grad=_grad_broadcast_unary_positive,
@@ -1609,7 +1610,7 @@ TestLog1pInplaceBroadcast = makeBroadcastTester(
 )
 TestSqrtBroadcast = makeBroadcastTester(
-    op=tensor.sqrt,
+    op=tt.sqrt,
    expected=upcast_float16_ufunc(np.sqrt),
    good=_good_broadcast_unary_positive,
    grad=_grad_broadcast_unary_positive,
@@ -1643,7 +1644,7 @@ else:
    angle_eps = 1e-10
 TestDeg2radBroadcast = makeBroadcastTester(
-    op=tensor.deg2rad,
+    op=tt.deg2rad,
    expected=upcast_float16_ufunc(np.deg2rad),
    good=_good_broadcast_unary_normal_no_complex,
    grad=_grad_broadcast_unary_normal_no_complex,
@@ -1658,7 +1659,7 @@ TestDeg2radInplaceBroadcast = makeBroadcastTester(
 )
 TestRad2degBroadcast = makeBroadcastTester(
-    op=tensor.rad2deg,
+    op=tt.rad2deg,
    expected=upcast_float16_ufunc(np.rad2deg),
    good=_good_broadcast_unary_normal_no_complex,
    grad=_grad_broadcast_unary_normal_no_complex,
@@ -1673,7 +1674,7 @@ TestRad2degInplaceBroadcast = makeBroadcastTester(
 )
 TestSinBroadcast = makeBroadcastTester(
-    op=tensor.sin,
+    op=tt.sin,
    expected=upcast_float16_ufunc(np.sin),
    good=_good_broadcast_unary_wide,
    grad=_grad_broadcast_unary_wide,
@@ -1706,7 +1707,7 @@ _grad_broadcast_unary_arcsin = dict(
 )
 TestArcsinBroadcast = makeBroadcastTester(
-    op=tensor.arcsin,
+    op=tt.arcsin,
    expected=upcast_float16_ufunc(np.arcsin),
    good=_good_broadcast_unary_arcsin,
    grad=_grad_broadcast_unary_arcsin,
@@ -1719,7 +1720,7 @@ TestArcsinInplaceBroadcast = makeBroadcastTester(
 )
 TestCosBroadcast = makeBroadcastTester(
-    op=tensor.cos,
+    op=tt.cos,
    expected=upcast_float16_ufunc(np.cos),
    good=_good_broadcast_unary_wide,
    grad=_grad_broadcast_unary_wide,
@@ -1733,14 +1734,14 @@ TestCosInplaceBroadcast = makeBroadcastTester(
 def test_py_c_match():
-    a = tensor.TensorType(dtype="int8", broadcastable=(False,))()
+    a = tt.TensorType(dtype="int8", broadcastable=(False,))()
-    f = theano.function([a], tensor.arccos(a), mode="DebugMode")
+    f = theano.function([a], tt.arccos(a), mode="DebugMode")
    # This can fail in DebugMode
    f(np.asarray([1, 0, -1], dtype="int8"))
 TestArccosBroadcast = makeBroadcastTester(
-    op=tensor.arccos,
+    op=tt.arccos,
    expected=upcast_float16_ufunc(np.arccos),
    good=_good_broadcast_unary_arcsin,
    grad=_grad_broadcast_unary_arcsin,
@@ -1768,7 +1769,7 @@ _grad_broadcast_unary_tan = dict(
 )
 TestTanBroadcast = makeBroadcastTester(
-    op=tensor.tan,
+    op=tt.tan,
    expected=upcast_float16_ufunc(np.tan),
    good=_good_broadcast_unary_tan,
    grad=_grad_broadcast_unary_tan,
@@ -1784,7 +1785,7 @@ TestTanInplaceBroadcast = makeBroadcastTester(
 )
 TestArctanBroadcast = makeBroadcastTester(
-    op=tensor.arctan,
+    op=tt.arctan,
    expected=upcast_float16_ufunc(np.arctan),
    good=_good_broadcast_unary_wide,
    grad=_grad_broadcast_unary_wide,
@@ -1828,7 +1829,7 @@ _grad_broadcast_binary_arctan2 = dict(
 )
 TestArctan2Broadcast = makeBroadcastTester(
-    op=tensor.arctan2,
+    op=tt.arctan2,
    expected=upcast_float16_ufunc(np.arctan2),
    good=_good_broadcast_binary_arctan2,
    grad=_grad_broadcast_binary_arctan2,
@@ -1845,7 +1846,7 @@ TestArctan2InplaceBroadcast = makeBroadcastTester(
 )
 TestCoshBroadcast = makeBroadcastTester(
-    op=tensor.cosh,
+    op=tt.cosh,
    expected=upcast_float16_ufunc(np.cosh),
    good=dict(
        _good_broadcast_unary_normal,
@@ -1874,7 +1875,7 @@ _grad_broadcast_unary_arccosh = dict(
 )
 TestArccoshBroadcast = makeBroadcastTester(
-    op=tensor.arccosh,
+    op=tt.arccosh,
    expected=upcast_float16_ufunc(np.arccosh),
    good=_good_broadcast_unary_arccosh,
    grad=_grad_broadcast_unary_arccosh,
@@ -1887,7 +1888,7 @@ TestArccoshInplaceBroadcast = makeBroadcastTester(
 )
 TestSinhBroadcast = makeBroadcastTester(
-    op=tensor.sinh,
+    op=tt.sinh,
    expected=upcast_float16_ufunc(np.sinh),
    good=dict(
        _good_broadcast_unary_normal,
@@ -1905,7 +1906,7 @@ TestSinhInplaceBroadcast = makeBroadcastTester(
 )
 TestArcsinhBroadcast = makeBroadcastTester(
-    op=tensor.arcsinh,
+    op=tt.arcsinh,
    expected=upcast_float16_ufunc(np.arcsinh),
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1918,7 +1919,7 @@ TestArcsinhInplaceBroadcast = makeBroadcastTester(
 )
 TestTanhBroadcast = makeBroadcastTester(
-    op=tensor.tanh,
+    op=tt.tanh,
    expected=upcast_float16_ufunc(np.tanh),
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -1944,7 +1945,7 @@ _grad_broadcast_unary_arctanh = dict(
 )
 TestArctanhBroadcast = makeBroadcastTester(
-    op=tensor.arctanh,
+    op=tt.arctanh,
    expected=upcast_float16_ufunc(np.arctanh),
    good=_good_broadcast_unary_arctanh,
    grad=_grad_broadcast_unary_arctanh,
@@ -2001,7 +2002,7 @@ else:
    skip_scipy = "scipy is not present"
 TestErfBroadcast = makeBroadcastTester(
-    op=tensor.erf,
+    op=tt.erf,
    expected=expected_erf,
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -2020,7 +2021,7 @@ TestErfInplaceBroadcast = makeBroadcastTester(
 )
 TestErfcBroadcast = makeBroadcastTester(
-    op=tensor.erfc,
+    op=tt.erfc,
    expected=expected_erfc,
    good=_good_broadcast_unary_normal_float_no_complex,
    grad=_grad_broadcast_unary_normal,
@@ -2039,7 +2040,7 @@ TestErfcInplaceBroadcast = makeBroadcastTester(
 )
 TestErfcxBroadcast = makeBroadcastTester(
-    op=tensor.erfcx,
+    op=tt.erfcx,
    expected=expected_erfcx,
    good=_good_broadcast_unary_normal_float_no_complex_small_neg_range,
    grad=_grad_broadcast_unary_normal_small_neg_range,
@@ -2058,7 +2059,7 @@ TestErfcxInplaceBroadcast = makeBroadcastTester(
 )
 TestErfinvBroadcast = makeBroadcastTester(
-    op=tensor.erfinv,
+    op=tt.erfinv,
    expected=expected_erfinv,
    good={
        "normal": [rand_ranged(-0.9, 0.9, (2, 3))],
@@ -2071,7 +2072,7 @@ TestErfinvBroadcast = makeBroadcastTester(
 )
 TestErfcinvBroadcast = makeBroadcastTester(
-    op=tensor.erfcinv,
+    op=tt.erfcinv,
    expected=expected_erfcinv,
    good={
        "normal": [rand_ranged(0.001, 1.9, (2, 3))],
@@ -2097,7 +2098,7 @@ _grad_broadcast_unary_gammaln = dict(
 )
 TestGammaBroadcast = makeBroadcastTester(
-    op=tensor.gamma,
+    op=tt.gamma,
    expected=expected_gamma,
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
@@ -2116,7 +2117,7 @@ TestGammaInplaceBroadcast = makeBroadcastTester(
 )
 TestGammalnBroadcast = makeBroadcastTester(
-    op=tensor.gammaln,
+    op=tt.gammaln,
    expected=expected_gammaln,
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
@@ -2143,7 +2144,7 @@ _good_broadcast_unary_psi = dict(
 )
 TestPsiBroadcast = makeBroadcastTester(
-    op=tensor.psi,
+    op=tt.psi,
    expected=expected_psi,
    good=_good_broadcast_unary_psi,
    eps=2e-10,
@@ -2163,7 +2164,7 @@ TestPsiInplaceBroadcast = makeBroadcastTester(
 _good_broadcast_unary_tri_gamma = _good_broadcast_unary_psi
 TestTriGammaBroadcast = makeBroadcastTester(
-    op=tensor.tri_gamma,
+    op=tt.tri_gamma,
    expected=expected_tri_gamma,
    good=_good_broadcast_unary_psi,
    eps=2e-8,
@@ -2198,7 +2199,7 @@ _good_broadcast_unary_chi2sf = dict(
 )
 TestChi2SFBroadcast = makeBroadcastTester(
-    op=tensor.chi2sf,
+    op=tt.chi2sf,
    expected=expected_chi2sf,
    good=_good_broadcast_unary_chi2sf,
    eps=2e-10,
@@ -2249,7 +2250,7 @@ _grad_broadcast_binary_bessel = dict(
 )
 TestJ0Broadcast = makeBroadcastTester(
-    op=tensor.j0,
+    op=tt.j0,
    expected=expected_j0,
    good=_good_broadcast_unary_bessel,
    grad=_grad_broadcast_unary_bessel,
@@ -2269,7 +2270,7 @@ TestJ0InplaceBroadcast = makeBroadcastTester(
 )
 TestJ1Broadcast = makeBroadcastTester(
-    op=tensor.j1,
+    op=tt.j1,
    expected=expected_j1,
    good=_good_broadcast_unary_bessel,
    grad=_grad_broadcast_unary_bessel,
@@ -2289,7 +2290,7 @@ TestJ1InplaceBroadcast = makeBroadcastTester(
 )
 TestJvBroadcast = makeBroadcastTester(
-    op=tensor.jv,
+    op=tt.jv,
    expected=expected_jv,
    good=_good_broadcast_binary_bessel,
    eps=2e-10,
@@ -2316,13 +2317,13 @@ def test_verify_jv_grad():
    v_val, x_val = _grad_broadcast_binary_bessel["normal"]
    def fixed_first_input_jv(x):
-        return tensor.jv(v_val, x)
+        return tt.jv(v_val, x)
    utt.verify_grad(fixed_first_input_jv, [x_val])
 TestI0Broadcast = makeBroadcastTester(
-    op=tensor.i0,
+    op=tt.i0,
    expected=expected_i0,
    good=_good_broadcast_unary_bessel,
    grad=_grad_broadcast_unary_bessel,
@@ -2342,7 +2343,7 @@ TestI0InplaceBroadcast = makeBroadcastTester(
 )
 TestI1Broadcast = makeBroadcastTester(
-    op=tensor.i1,
+    op=tt.i1,
    expected=expected_i1,
    good=_good_broadcast_unary_bessel,
    grad=_grad_broadcast_unary_bessel,
@@ -2362,7 +2363,7 @@ TestI1InplaceBroadcast = makeBroadcastTester(
 )
 TestIvBroadcast = makeBroadcastTester(
-    op=tensor.iv,
+    op=tt.iv,
    expected=expected_iv,
    good=_good_broadcast_binary_bessel,
    eps=2e-10,
@@ -2389,13 +2390,13 @@ def test_verify_iv_grad():
    v_val, x_val = _grad_broadcast_binary_bessel["normal"]
    def fixed_first_input_iv(x):
-        return tensor.iv(v_val, x)
+        return tt.iv(v_val, x)
    utt.verify_grad(fixed_first_input_iv, [x_val])
 TestZerosLikeBroadcast = makeBroadcastTester(
-    op=tensor.zeros_like,
+    op=tt.zeros_like,
    expected=np.zeros_like,
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -2403,7 +2404,7 @@ TestZerosLikeBroadcast = makeBroadcastTester(
 )
 TestOnesLikeBroadcast = makeBroadcastTester(
-    op=tensor.ones_like,
+    op=tt.ones_like,
    expected=np.ones_like,
    good=_good_broadcast_unary_normal,
    grad=_grad_broadcast_unary_normal,
@@ -2428,13 +2429,13 @@ _grad_complex_from_polar = dict(
 )
 TestComplexFromPolarBroadcast = makeBroadcastTester(
-    op=tensor.complex_from_polar,
+    op=tt.complex_from_polar,
    expected=lambda r, theta: r * np.cos(theta) + 1j * r * np.sin(theta),
    good=_good_complex_from_polar,
 )
 TestConjBroadcast = makeBroadcastTester(
-    op=tensor.conj, expected=np.conj, good=_good_broadcast_unary_normal
+    op=tt.conj, expected=np.conj, good=_good_broadcast_unary_normal
 )
 TestConjInplaceBroadcast = makeBroadcastTester(
    op=inplace.conj_inplace,
@@ -2751,23 +2752,23 @@ class ApplyDefaultTestOp(theano.Op):
        self.default_output = id
    def make_node(self, x):
-        x = theano.tensor.as_tensor_variable(x)
+        x = tt.as_tensor_variable(x)
        return theano.Apply(self, [x], [x.type()])
 def test_constant():
-    int8_vector_type = tensor.TensorType(dtype="int8", broadcastable=(False,))
+    int8_vector_type = tt.TensorType(dtype="int8", broadcastable=(False,))
    # Make sure we return a `TensorConstant` unchanged
-    x = tensor.TensorConstant(int8_vector_type, [1, 2])
+    x = tt.TensorConstant(int8_vector_type, [1, 2])
    y = constant(x)
    assert y is x
    # Make sure we can add and remove broadcastable dimensions
-    int8_scalar_type = tensor.TensorType(dtype="int8", broadcastable=())
+    int8_scalar_type = tt.TensorType(dtype="int8", broadcastable=())
    x_data = np.array(2, dtype="int8")
-    x = tensor.TensorConstant(int8_scalar_type, x_data)
+    x = tt.TensorConstant(int8_scalar_type, x_data)
    y = constant(x, ndim=1)
    assert y.ndim == 1
    assert np.array_equal(y.data, np.expand_dims(x_data, 0))
@@ -2788,7 +2789,7 @@ class TestAsTensorVariable:
    """
    def setup_method(self):
-        self.x = tensor.scalar("x")
+        self.x = tt.scalar("x")
    def test_tensor_from_scalar(self):
        y = as_tensor_variable(scal.int8())
@@ -2811,7 +2812,7 @@ class TestAsTensorVariable:
        # mask exceptions caused by unrelated logic (e.g.  computing test
        # values)
        with change_flags(compute_test_value="raise"), pytest.raises(ValueError):
-            a = tensor.lscalar("a")
+            a = tt.lscalar("a")
            y = (a, a, 1)
            _ = as_tensor_variable(y)
@@ -2820,11 +2821,11 @@ class TestAsTensorVariable:
            as_tensor_variable(bad_apply_var)
    def test_strip_leading_broadcastable(self):
-        x = tensor.TensorType(config.floatX, (True, False))("x")
+        x = tt.TensorType(config.floatX, (True, False))("x")
        x = as_tensor_variable(x, ndim=1)
        assert x.ndim == 1
-        x = tensor.matrix("x", dtype=config.floatX)
+        x = tt.matrix("x", dtype=config.floatX)
        with pytest.raises(ValueError):
            as_tensor_variable(x, ndim=1)
@@ -2843,7 +2844,7 @@ class TestAsTensorVariable:
        new_inp = np.memmap(fname, dtype=inp.dtype, mode="w+", shape=inp.shape)
        new_inp[...] = inp
        res = as_tensor_variable(new_inp)
-        assert isinstance(res, tensor.TensorConstant)
+        assert isinstance(res, tt.TensorConstant)
        assert res.data is new_inp
    @pytest.mark.parametrize(
@@ -2863,26 +2864,24 @@ class TestAsTensorVariable:
        ("x", "y"),
        [
            ([1, 2], [1, 2]),
-            ([tensor.as_tensor(1), tensor.as_tensor(2)], [1, 2]),
+            ([tt.as_tensor(1), tt.as_tensor(2)], [1, 2]),
            ([theano.scalar.constant(1), theano.scalar.constant(2)], [1, 2]),
        ],
    )
    def test_constant_consistency(self, x, y):
        a = as_tensor_variable(x)
-        assert isinstance(a, tensor.TensorConstant)
+        assert isinstance(a, tt.TensorConstant)
        assert np.array_equal(a.data, y)
    def test_constant_identity(self):
        # Values that are already `TensorType`s shouldn't be recreated by
        # `as_tensor_variable`
-        x_scalar = tensor.TensorConstant(
+        x_scalar = tt.TensorConstant(tt.TensorType(dtype="int8", broadcastable=()), 2)
-            tensor.TensorType(dtype="int8", broadcastable=()), 2
-        )
        a_scalar = as_tensor_variable(x_scalar)
        assert x_scalar is a_scalar
-        x_vector = tensor.TensorConstant(
+        x_vector = tt.TensorConstant(
-            tensor.TensorType(dtype="int8", broadcastable=(False,)),
+            tt.TensorType(dtype="int8", broadcastable=(False,)),
            np.array([1, 2], dtype="int8"),
        )
        a_vector = as_tensor_variable(x_vector)
@@ -2893,7 +2892,7 @@ class TestAlloc:
    dtype = config.floatX
    mode = mode_opt
    shared = staticmethod(theano.shared)
-    allocs = [tensor.Alloc()] * 3
+    allocs = [tt.Alloc()] * 3
    def setup_method(self):
        self.rng = np.random.RandomState(seed=utt.fetch_seed())
@@ -2904,7 +2903,7 @@ class TestAlloc:
        some_vector = vector("some_vector", dtype=self.dtype)
        some_matrix = some_vector.reshape((60, 50))
        variables = self.shared(np.ones((50,), dtype=self.dtype))
-        idx = tensor.constant(np.arange(50))
+        idx = tt.constant(np.arange(50))
        for alloc_, (subtensor, n_alloc) in zip(
            self.allocs,
@@ -2935,7 +2934,7 @@ class TestAlloc:
            fgrad(test_params)
    def test_alloc_output(self):
-        val = tensor.constant(self.rng.randn(1, 1), dtype=self.dtype)
+        val = tt.constant(self.rng.randn(1, 1), dtype=self.dtype)
        for alloc_ in self.allocs:
            # The output is the result of the alloc operation,
            # we do not want it to be constant-folded
@@ -2948,35 +2947,35 @@ class TestAlloc:
    def test_ones(self):
        for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            ones = theano.function([], [tensor.ones(shp)], mode=self.mode)
+            ones = theano.function([], [tt.ones(shp)], mode=self.mode)
            assert np.allclose(ones(), np.ones(shp))
        # scalar doesn't have to be provided as input
        x = scalar()
        shp = []
-        ones_scalar = theano.function([], [tensor.ones(x.shape)], mode=self.mode)
+        ones_scalar = theano.function([], [tt.ones(x.shape)], mode=self.mode)
        assert np.allclose(ones_scalar(), np.ones(shp))
        for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
            x = typ()
-            ones_tensor = theano.function([x], [tensor.ones(x.shape)], mode=self.mode)
+            ones_tensor = theano.function([x], [tt.ones(x.shape)], mode=self.mode)
            inp = np.zeros(shp, dtype=config.floatX)
            assert np.allclose(ones_tensor(inp), np.ones(shp))
    def test_zeros(self):
        for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            zeros = theano.function([], [tensor.zeros(shp)], mode=self.mode)
+            zeros = theano.function([], [tt.zeros(shp)], mode=self.mode)
            assert np.allclose(zeros(), np.zeros(shp))
        # scalar doesn't have to be provided as input
        x = scalar()
        shp = []
-        zeros_scalar = theano.function([], [tensor.zeros(x.shape)], mode=self.mode)
+        zeros_scalar = theano.function([], [tt.zeros(x.shape)], mode=self.mode)
        assert np.allclose(zeros_scalar(), np.zeros(shp))
        for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
            x = typ()
-            zeros_tensor = theano.function([x], [tensor.zeros(x.shape)], mode=self.mode)
+            zeros_tensor = theano.function([x], [tt.zeros(x.shape)], mode=self.mode)
            inp = np.zeros(shp, dtype=config.floatX)
            assert np.allclose(zeros_tensor(inp), np.zeros(shp))
@@ -2991,9 +2990,9 @@ def test_eye():
        # allowed.
        if M is None and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
            M = N
-        N_symb = tensor.iscalar()
+        N_symb = tt.iscalar()
-        M_symb = tensor.iscalar()
+        M_symb = tt.iscalar()
-        k_symb = tensor.iscalar()
+        k_symb = tt.iscalar()
        f = function([N_symb, M_symb, k_symb], eye(N_symb, M_symb, k_symb, dtype=dtype))
        result = f(N, M, k)
        assert np.allclose(result, np.eye(N, M_, k, dtype=dtype))
@@ -3025,9 +3024,9 @@ class TestTriangle:
            # allowed.
            if M is None and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
                M = N
-            N_symb = tensor.iscalar()
+            N_symb = tt.iscalar()
-            M_symb = tensor.iscalar()
+            M_symb = tt.iscalar()
-            k_symb = tensor.iscalar()
+            k_symb = tt.iscalar()
            f = function(
                [N_symb, M_symb, k_symb], tri(N_symb, M_symb, k_symb, dtype=dtype)
            )
@@ -3091,9 +3090,7 @@ class TestNonzero:
    @change_flags(compute_test_value="raise")
    def test_nonzero(self):
        def check(m):
-            m_symb = theano.tensor.tensor(
+            m_symb = tt.tensor(dtype=m.dtype, broadcastable=(False,) * m.ndim)
-                dtype=m.dtype, broadcastable=(False,) * m.ndim
-            )
            m_symb.tag.test_value = m
            res_tuple_tt = nonzero(m_symb, return_matrix=False)
@@ -3122,9 +3119,7 @@ class TestNonzero:
    @change_flags(compute_test_value="raise")
    def test_flatnonzero(self):
        def check(m):
-            m_symb = theano.tensor.tensor(
+            m_symb = tt.tensor(dtype=m.dtype, broadcastable=(False,) * m.ndim)
-                dtype=m.dtype, broadcastable=(False,) * m.ndim
-            )
            m_symb.tag.test_value = m
            res_tt = flatnonzero(m_symb)
@@ -3147,9 +3142,7 @@ class TestNonzero:
    @change_flags(compute_test_value="raise")
    def test_nonzero_values(self):
        def check(m):
-            m_symb = theano.tensor.tensor(
+            m_symb = tt.tensor(dtype=m.dtype, broadcastable=(False,) * m.ndim)
-                dtype=m.dtype, broadcastable=(False,) * m.ndim
-            )
            m_symb.tag.test_value = m
            res_tt = nonzero_values(m_symb)
@@ -3173,7 +3166,7 @@ class TestNonzero:
 def test_identity():
    def check(dtype):
        obj = rand_of_dtype((2,), dtype)
-        sym = tensor.vector(dtype=dtype)
+        sym = tt.vector(dtype=dtype)
        f = function([sym], tensor_copy(sym))
        assert np.all(obj == f(obj))
        assert obj.dtype == f(obj).dtype
@@ -3200,8 +3193,8 @@ class TestCast:
            ],
        )
        for testname, (obj, dtype) in good:
-            inp = tensor.vector(dtype=obj.dtype)
+            inp = tt.vector(dtype=obj.dtype)
-            out = tensor.cast(inp, dtype=dtype)
+            out = tt.cast(inp, dtype=dtype)
            f = function([inp], out)
            assert f(obj).dtype == np.dtype(dtype)
@@ -3212,8 +3205,8 @@ class TestCast:
    def test_cast_from_real_to_complex(self):
        for real_dtype in REAL_DTYPES:
            for complex_dtype in COMPLEX_DTYPES:
-                inp = tensor.vector(dtype=real_dtype)
+                inp = tt.vector(dtype=real_dtype)
-                out = tensor.cast(inp, dtype=complex_dtype)
+                out = tt.cast(inp, dtype=complex_dtype)
                f = function([inp], out)
                obj = rand_of_dtype((2,), real_dtype)
                assert f(obj).dtype == np.dtype(complex_dtype)
@@ -3221,9 +3214,9 @@ class TestCast:
    def test_cast_from_complex_to_real_raises_error(self):
        for real_dtype in REAL_DTYPES:
            for complex_dtype in COMPLEX_DTYPES:
-                inp = tensor.vector(dtype=real_dtype)
+                inp = tt.vector(dtype=real_dtype)
                with pytest.raises(TypeError):
-                    tensor(tensor.cast(inp, dtype=complex_dtype))
+                    tt.tensor(tt.cast(inp, dtype=complex_dtype))
 TestClip = makeTester(
@@ -3296,15 +3289,15 @@ TestBackwardsClip = makeTester(
 class TestClip:
    def test_complex_value(self):
        for dtype in ["complex64", "complex128"]:
-            a = tensor.vector(dtype=dtype)
+            a = tt.vector(dtype=dtype)
-            b = tensor.scalar()
+            b = tt.scalar()
-            c = tensor.scalar()
+            c = tt.scalar()
            with pytest.raises(TypeError):
                clip(a, b, c)
    def test_clip_repeat_grad(self):
        # This is testing for the issue #633
-        x, y = tensor.vectors("xy")
+        x, y = tt.vectors("xy")
        a = clip(x, y, x)
        g = theano.gradient.grad(a.sum(), x)
        fn = theano.function([x, y], [g])
@@ -3315,7 +3308,7 @@ class TestClip:
        fn2 = theano.function([x, y], [g2])
        # Test for the equal case too
-        a3 = theano.tensor.clip(x, x, x)
+        a3 = tt.clip(x, x, x)
        g3 = theano.gradient.grad(a3.sum(), x)
        fn3 = theano.function([x], [g3])
@@ -3353,9 +3346,9 @@ class TestClip:
 def test_batched_dot():
-    first = theano.tensor.tensor3("first")
+    first = tt.tensor3("first")
-    second = theano.tensor.tensor3("second")
+    second = tt.tensor3("second")
-    output = theano.tensor.basic.batched_dot(first, second)
+    output = tt.basic.batched_dot(first, second)
    first_val = np.random.rand(10, 10, 20).astype(config.floatX)
    second_val = np.random.rand(10, 20, 5).astype(config.floatX)
    result_fn = theano.function([first, second], output)
@@ -3364,9 +3357,9 @@ def test_batched_dot():
    assert result.shape[1] == first_val.shape[1]
    assert result.shape[2] == second_val.shape[2]
-    first_mat = theano.tensor.dmatrix("first")
+    first_mat = tt.dmatrix("first")
-    second_mat = theano.tensor.dmatrix("second")
+    second_mat = tt.dmatrix("second")
-    output = theano.tensor.basic.batched_dot(first_mat, second_mat)
+    output = tt.basic.batched_dot(first_mat, second_mat)
    first_mat_val = np.random.rand(10, 10).astype(config.floatX)
    second_mat_val = np.random.rand(10, 10).astype(config.floatX)
    result_fn = theano.function([first_mat, second_mat], output)
@@ -3406,10 +3399,10 @@ def test_batched_dot_not_contiguous():
 def test_batched_tensordot():
-    first = theano.tensor.tensor4("first")
+    first = tt.tensor4("first")
-    second = theano.tensor.tensor4("second")
+    second = tt.tensor4("second")
    axes = [[1, 2], [3, 1]]
-    output = theano.tensor.basic.batched_tensordot(first, second, axes)
+    output = tt.batched_tensordot(first, second, axes)
    first_val = np.random.rand(8, 10, 20, 3).astype(config.floatX)
    second_val = np.random.rand(8, 20, 5, 10).astype(config.floatX)
    result_fn = theano.function([first, second], output)
@@ -3418,10 +3411,10 @@ def test_batched_tensordot():
    assert result.shape[1] == first_val.shape[3]
    assert result.shape[2] == second_val.shape[2]
-    first_mat = theano.tensor.dmatrix("first")
+    first_mat = tt.dmatrix("first")
-    second_mat = theano.tensor.dmatrix("second")
+    second_mat = tt.dmatrix("second")
    axes = 1
-    output = theano.tensor.basic.batched_tensordot(first_mat, second_mat, axes)
+    output = tt.batched_tensordot(first_mat, second_mat, axes)
    first_mat_val = np.random.rand(10, 4).astype(config.floatX)
    second_mat_val = np.random.rand(10, 4).astype(config.floatX)
    result_fn = theano.function([first_mat, second_mat], output)
@@ -3481,7 +3474,7 @@ def test_nan_inf_constant_signature():
            assert (x.signature() == y.signature()) == (i == j)
    # Also test that nan !=0 and nan != nan.
-    x = tensor.scalar()
+    x = tt.scalar()
    mode = get_default_mode()
    if isinstance(mode, theano.compile.debugmode.DebugMode):
        # Disable the check preventing usage of NaN / Inf values.
@@ -3495,16 +3488,16 @@ def test_nan_inf_constant_signature():
 def test_isnan():
-    for x in [tensor.matrix(), tensor.imatrix(), tensor.matrix(dtype="bool")]:
+    for x in [tt.matrix(), tt.imatrix(), tt.matrix(dtype="bool")]:
-        y = tensor.isnan(x)
+        y = tt.isnan(x)
-        assert isinstance(y.owner.op, tensor.Elemwise) == (
+        assert isinstance(y.owner.op, tt.Elemwise) == (
-            x.dtype not in tensor.discrete_dtypes
+            x.dtype not in tt.discrete_dtypes
        )
        assert y.dtype == "bool"
        # Test c code generator even for int type.
-        y = tensor.isnan_(x)
+        y = tt.isnan_(x)
-        assert isinstance(y.owner.op, tensor.Elemwise)
+        assert isinstance(y.owner.op, tt.Elemwise)
        assert y.dtype == "bool"
        f = theano.function([x], y, allow_input_downcast=True)
        f([[0, 1, 2]])
@@ -3661,7 +3654,7 @@ class TestMaxAndArgmax:
        x = matrix()
        cost = argmax(x, axis=0).sum()
        gx = grad(cost, x)
-        val = tensor.get_scalar_constant_value(gx)
+        val = tt.get_scalar_constant_value(gx)
        assert val == 0.0
    def test_grad(self):
@@ -3743,7 +3736,7 @@ class TestMaxAndArgmax:
    def test_preserve_broadcastable(self):
        # Ensure the original broadcastable flags are preserved by Max/Argmax.
-        x = tensor.matrix().dimshuffle("x", 0, "x", 1, "x")
+        x = tt.matrix().dimshuffle("x", 0, "x", 1, "x")
        y = x.max(axis=1)
        assert y.type.broadcastable == (True, True, False, True)
@@ -3758,7 +3751,7 @@ class TestMaxAndArgmax:
        assert tuple(v) == np.max(data, (1, -1)).shape
    def test_zero_shape(self):
-        x = tensor.matrix()
+        x = tt.matrix()
        m, i = max_and_argmax(x, axis=1)
        f = theano.function([x], [m, i])
        xv = np.zeros((0, 4), dtype=floatX)
@@ -4177,20 +4170,20 @@ class TestMinMax:
 def test_basic_allclose():
    # This was raised by a user in https://github.com/Theano/Theano/issues/2975
-    assert tensor.basic._allclose(-0.311023883434, -0.311022856884)
+    assert tt.basic._allclose(-0.311023883434, -0.311022856884)
 class TestOuter:
    def test_outer(self):
        for m in range(4):
            for n in range(4):
-                x = tensor.tensor(dtype="floatX", broadcastable=(False,) * m)
+                x = tt.tensor(dtype="floatX", broadcastable=(False,) * m)
-                y = tensor.tensor(dtype="floatX", broadcastable=(False,) * n)
+                y = tt.tensor(dtype="floatX", broadcastable=(False,) * n)
                s1 = np.random.randint(1, 10, m)
                s2 = np.random.randint(1, 10, n)
                v1 = np.asarray(np.random.rand(*s1)).astype(floatX)
                v2 = np.asarray(np.random.rand(*s2)).astype(floatX)
-                o = tensor.outer(x, y).eval({x: v1, y: v2})
+                o = tt.outer(x, y).eval({x: v1, y: v2})
                assert_allclose(o, np.outer(v1, v2))
    def test_grad(self):
@@ -4212,7 +4205,7 @@ class TestOuter:
        ]:
            data0 = np.random.rand(*shp0).astype(floatX)
            data1 = np.random.rand(*shp1).astype(floatX)
-            utt.verify_grad(tensor.outer, [data0, data1])
+            utt.verify_grad(tt.outer, [data0, data1])
 class TestGetVectorLength:
@@ -4282,7 +4275,7 @@ class TestJoinAndSplit:
        # tested only on cpu as gpu support only float32
        a = as_tensor_variable(1)
        b = as_tensor_variable(2.0)
-        c = tensor._shared(np.asarray(3.0, dtype=self.floatX))
+        c = tt._shared(np.asarray(3.0, dtype=self.floatX))
        s = stack([a, b, c])
        want = np.array([1, 2, 3])
        out = self.eval_outputs_and_check_vector([s], opt.MakeVector())
@@ -4302,8 +4295,8 @@ class TestJoinAndSplit:
        # Test that calling stack() on scalars instantiates MakeVector,
        # not Join. Test that the floatX dtype stay floatX, not downcasted
        # to int64
-        a = tensor.scalar("a", dtype=self.floatX)
+        a = tt.scalar("a", dtype=self.floatX)
-        b = tensor.scalar("b", dtype=self.floatX)
+        b = tt.scalar("b", dtype=self.floatX)
        s = stack([a, b, a, b])
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
@@ -4317,8 +4310,8 @@ class TestJoinAndSplit:
    def test_stack_scalar_make_vector_dtype(self):
        # Test that calling stack() on scalars instantiates MakeVector,
        # event when the scalar don't have the same dtype.
-        a = tensor.iscalar("a")
+        a = tt.iscalar("a")
-        b = tensor.lscalar("b")
+        b = tt.lscalar("b")
        s = stack([a, b, a, b])
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
@@ -4331,8 +4324,8 @@ class TestJoinAndSplit:
    def test_stack_scalar_make_vector_constant(self):
        # Test that calling stack() on scalars instantiates MakeVector,
        # event when the scalar are simple int type.
-        a = tensor.iscalar("a")
+        a = tt.iscalar("a")
-        b = tensor.lscalar("b")
+        b = tt.lscalar("b")
        # test when the constant is the first element.
        # The first element is used in a special way
        s = stack([10, a, b, np.int8(3)])
@@ -4349,8 +4342,8 @@ class TestJoinAndSplit:
        # Testing against old interface
        warnings.simplefilter("always", DeprecationWarning)
-        a = tensor.imatrix("a")
+        a = tt.imatrix("a")
-        b = tensor.imatrix("b")
+        b = tt.imatrix("b")
        s1 = stack(a, b)
        s2 = stack([a, b])
        f = function([a, b], [s1, s2], mode=self.mode)
@@ -4403,8 +4396,8 @@ class TestJoinAndSplit:
    def test_stack_hessian(self):
        # Test the gradient of stack when used in hessian, see gh-1589
-        a = tensor.dvector("a")
+        a = tt.dvector("a")
-        b = tensor.dvector("b")
+        b = tt.dvector("b")
        A = stack([a, b])
        B = A.T.dot(A)
        Ha, Hb = hessian(B.sum(), [a, b])
@@ -4423,8 +4416,8 @@ class TestJoinAndSplit:
    def test_stack_hessian2(self):
        # Test the hessian macro when the gradient itself does not depend
        # on the input (but the cost does)
-        a = tensor.dvector("a")
+        a = tt.dvector("a")
-        b = tensor.dvector("b")
+        b = tt.dvector("b")
        A = stack([a, b])
        Ha, Hb = hessian(A.sum(), [a, b])
@@ -4442,8 +4435,8 @@ class TestJoinAndSplit:
    def test_join_concatenate_one_element(self):
        # Fast test of concatenate as this is an alias for join.
        # also test that we remove the Join op if there is only 1 input
-        m = tensor.fmatrix()
+        m = tt.fmatrix()
-        c = tensor.concatenate([m])
+        c = tt.concatenate([m])
        f = theano.function(
            inputs=[m], outputs=[c], mode=self.mode.including("local_join_1")
        )
@@ -4741,14 +4734,12 @@ class TestJoinAndSplit:
        assert not c.type.broadcastable[1]
        # Opt can remplace the int by a Theano constant
-        c = self.join_op(theano.tensor.constant(1), a, b)
+        c = self.join_op(tt.constant(1), a, b)
        assert c.type.broadcastable[0] and c.type.broadcastable[2]
        assert not c.type.broadcastable[1]
        # In case futur opt insert other useless stuff
-        c = self.join_op(
+        c = self.join_op(tt.cast(tt.constant(1), dtype="int32"), a, b)
-            theano.tensor.cast(theano.tensor.constant(1), dtype="int32"), a, b
-        )
        assert c.type.broadcastable[0] and c.type.broadcastable[2]
        assert not c.type.broadcastable[1]
@@ -4958,10 +4949,10 @@ class TestJoinAndSplit:
    def test_rebroadcast(self):
        # Regression test for a crash that used to happen when rebroadcasting.
-        x = tensor.TensorType(self.floatX, [False, False, True])()
+        x = tt.TensorType(self.floatX, [False, False, True])()
-        u = tensor.TensorType(self.floatX, [False, False, True])()
+        u = tt.TensorType(self.floatX, [False, False, True])()
        # This line used to crash.
-        tensor.concatenate([x, -u], axis=2)
+        tt.concatenate([x, -u], axis=2)
    def test_concatenate_same(self):
        # Test that we can concatenate the same tensor multiple time.
@@ -4969,7 +4960,7 @@ class TestJoinAndSplit:
        # In the past it was broken on the GPU.
        rng = np.random.RandomState(seed=utt.fetch_seed())
        T_shared = self.shared(rng.rand(3, 4).astype(self.floatX))
-        Tout = tensor.concatenate([T_shared, T_shared])
+        Tout = tt.concatenate([T_shared, T_shared])
        f = function([], Tout, mode=self.mode)
        out = f()
        if theano.config.mode != "FAST_COMPILE":
@@ -5027,9 +5018,9 @@ def test_join_inplace():
    # join function but all except one of them are empty. In this case join
    # should work inplace and the output should be the view of the non-empty
    # element.
-    s = tensor.lscalar()
+    s = tt.lscalar()
-    x = tensor.vector("x")
+    x = tt.vector("x")
-    z = tensor.zeros((s,))
+    z = tt.zeros((s,))
    join = Join(view=0)
    c = join(0, x, z, z)
@@ -5051,13 +5042,11 @@ def test_join_oneInput():
    # on an array of tensors but the array has only one element.
    # In this case, we would like to avoid the computational
    # overhead of concatenation of one element.
-    x_0 = theano.tensor.fmatrix()
+    x_0 = tt.fmatrix()
-    x_1 = theano.tensor.fmatrix()
+    x_1 = tt.fmatrix()
-    x_2 = theano.tensor.fvector()
+    x_2 = tt.fvector()
-    join_0 = theano.tensor.concatenate([x_0], axis=1)
+    join_0 = tt.concatenate([x_0], axis=1)
-    join_1 = theano.tensor.concatenate(
+    join_1 = tt.concatenate([x_0, x_1, tt.shape_padright(x_2)], axis=1)
-        [x_0, x_1, theano.tensor.shape_padright(x_2)], axis=1
-    )
    assert join_0 is x_0
    assert join_1 is not x_0
@@ -5093,9 +5082,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x > y)
@@ -5111,9 +5100,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x < y)
@@ -5129,9 +5118,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x <= y)
@@ -5147,9 +5136,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x >= y)
@@ -5165,9 +5154,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], eq(x, y))
@@ -5183,9 +5172,9 @@ class TestComparison:
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
-                (tensor.constant(l), self.shared(r.astype(dtype)), False),
+                (tt.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
-                (self.shared(l.astype(dtype)), tensor.constant(r), False),
+                (self.shared(l.astype(dtype)), tt.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], neq(x, y))
@@ -5367,7 +5356,7 @@ class TestAdd:
 class TestCeil:
    def test_complex(self):
        with pytest.raises(TypeError):
-            tensor.ceil(tensor.zvector())
+            tt.ceil(tt.zvector())
 class TestExp:
@@ -5442,24 +5431,24 @@ class TestDivimpl:
 class TestMean:
    def test_mean_single_element(self):
-        res = tensor.mean(np.zeros(1))
+        res = tt.mean(np.zeros(1))
        assert res.eval() == 0.0
    def test_mean_f16(self):
-        x = tensor.vector(dtype="float16")
+        x = tt.vector(dtype="float16")
        y = x.mean()
        f = theano.function([x], y)
        utt.assert_allclose(f(np.ones((100000,), dtype="float16")), 1.0)
    def test_basic(self):
-        x = tensor.vector()
+        x = tt.vector()
-        f = theano.function([x], tensor.mean(x))
+        f = theano.function([x], tt.mean(x))
        data = rand(50)
        assert np.allclose(f(data), np.mean(data))
    def test_list(self):
        ll = [theano.shared(0.0), theano.shared(2.0)]
-        assert tensor.mean(ll).eval() == 1
+        assert tt.mean(ll).eval() == 1
 class TestMatinv:
@@ -5540,7 +5529,7 @@ class TestDot:
    def test_Op_dims(self):
        # _dot is a Dot op instance
-        _dot = theano.tensor.basic._dot
+        _dot = tt.basic._dot
        d0 = scalar()
        d1 = vector()
        d2 = matrix()
@@ -5881,7 +5870,7 @@ class TestGrad:
        # grad: Test passing a single variable param
        o = TestGrad.Obj1()
        a1 = o.make_node()
-        assert o.gval0 is tensor.grad(a1.outputs[0], a1.inputs[0])
+        assert o.gval0 is tt.grad(a1.outputs[0], a1.inputs[0])
    def test_Nparam(self):
        # grad: Test passing multiple variable params
@@ -5900,14 +5889,14 @@ class TestGrad:
        # requires changing this test or making it fail you are almost certainly
        # making a common mistake, NOT fixing something.
-        X = tensor.matrix()
+        X = tt.matrix()
        y = X.sum()
-        G = tensor.grad(y, [X])
+        G = tt.grad(y, [X])
        assert isinstance(G, list)
-        G = tensor.grad(y, X)
+        G = tt.grad(y, X)
        assert not isinstance(G, list)
@@ -5970,7 +5959,7 @@ class TestOpCache:
 class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
    def setup_method(self):
-        self.shared = tensor._shared
+        self.shared = tt._shared
        self.op = Reshape
        # The tag canonicalize is needed for the shape test in FAST_COMPILE
        self.mode = None
@@ -5979,7 +5968,7 @@ class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
            opt.MakeVector,
            opt.Shape_i,
            DimShuffle,
-            theano.tensor.Elemwise,
+            tt.Elemwise,
        )
        super().setup_method()
@@ -6148,7 +6137,7 @@ class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
        assert f(np.ndarray((0,), dtype="float32")).shape == (0, 100)
    def test_empty_shp(self):
-        const = theano.tensor.constant([1]).reshape(())
+        const = tt.constant([1]).reshape(())
        f = function([], const)
        assert f().shape == ()
@@ -6156,7 +6145,7 @@ class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
 def test_make_column_matrix_broadcastable():
    # The goal of the operation made by `b` is to ensure the second dimension
    # of the column matrix is broadcastable.
-    a = tensor.dmatrix()
+    a = tt.dmatrix()
    b = a.reshape((a.shape[0],)).dimshuffle(0, "x")
    f = function([a], b)
    assert (f(np.zeros((3, 1))) + np.ones(2) == np.ones((3, 2))).all()
@@ -6270,26 +6259,26 @@ def test_is_flat():
    # given outdim
    # Constant variable
-    assert tensor.is_flat(tensor.as_tensor_variable(np.zeros((10))))
+    assert tt.is_flat(tt.as_tensor_variable(np.zeros((10))))
-    assert tensor.is_flat(tensor.as_tensor_variable(np.zeros((10, 10, 10))), ndim=3)
+    assert tt.is_flat(tt.as_tensor_variable(np.zeros((10, 10, 10))), ndim=3)
-    assert not tensor.is_flat(tensor.as_tensor_variable(np.zeros((10, 10, 10))))
+    assert not tt.is_flat(tt.as_tensor_variable(np.zeros((10, 10, 10))))
    # Symbolic variable
-    assert tensor.is_flat(tensor.vector())
+    assert tt.is_flat(tt.vector())
-    assert tensor.is_flat(tensor.tensor3(), ndim=3)
+    assert tt.is_flat(tt.tensor3(), ndim=3)
-    assert not tensor.is_flat(tensor.tensor3())
+    assert not tt.is_flat(tt.tensor3())
    # Reshape with constant shape
-    X = tensor.tensor4()
+    X = tt.tensor4()
-    assert tensor.is_flat(X.reshape((-1,)))
+    assert tt.is_flat(X.reshape((-1,)))
-    assert tensor.is_flat(X.reshape((10, 10, -1)), ndim=3)
+    assert tt.is_flat(X.reshape((10, 10, -1)), ndim=3)
-    assert not tensor.is_flat(X.reshape((10, 10, -1)))
+    assert not tt.is_flat(X.reshape((10, 10, -1)))
    # Reshape with symbolic shape
-    X = tensor.tensor4()
+    X = tt.tensor4()
-    assert tensor.is_flat(X.reshape((tensor.iscalar(),)))
+    assert tt.is_flat(X.reshape((tt.iscalar(),)))
-    assert tensor.is_flat(X.reshape((tensor.iscalar(),) * 3), ndim=3)
+    assert tt.is_flat(X.reshape((tt.iscalar(),) * 3), ndim=3)
-    assert not tensor.is_flat(X.reshape((tensor.iscalar(),) * 3))
+    assert not tt.is_flat(X.reshape((tt.iscalar(),) * 3))
 def test_tile():
@@ -6330,7 +6319,7 @@ def test_tile():
            == np.tile(x_, (2, 3, 4, 6))
        )
-    # Test when reps is integer, tensor.scalar or tensor.vector.
+    # Test when reps is integer, tt.scalar or tt.vector.
    # Test 1,2,3,4-dimensional cases.
    # Test input x has the shape [2], [2, 4], [2, 4, 3], [2, 4, 3, 5].
    test_shape = [2, 4, 3, 5]
@@ -6345,13 +6334,13 @@ def test_tile():
        f = function([x], tile(x, reps_))
        assert np.all(f(x_) == np.tile(x_, reps_))
-        # tensor.scalar:
+        # tt.scalar:
        reps = iscalar()
        reps_ = 2
        f = function([x, reps], tile(x, reps))
        assert np.all(f(x_, reps_) == np.tile(x_, reps_))
-        # tensor.vector:
+        # tt.vector:
        reps = ivector()
        reps_ = [2] if k == 1 or k == 2 else [2, 3]
        ndim_ = k
@@ -6363,7 +6352,7 @@ def test_tile():
        f = function([x], tile(x, reps_))
        assert np.all(f(x_) == np.tile(x_, reps_))
-        # list of integers and tensor.scalars:
+        # list of integers and tt.scalars:
        d = iscalar()
        reps = [2, d, 4]
        f = function([x, d], tile(x, reps))
@@ -6409,7 +6398,7 @@ def test_tile():
            tile(x, reps)
        # error raising test: ndim is not None, ndim < x.ndim
-        # 3 cases below (reps is list/tensor.scalar/tensor.vector):
+        # 3 cases below (reps is list/tt.scalar/tt.vector):
        for reps in [[2, 3, 4], iscalar(), ivector()]:
            if k > 1:
                ndim = k - 1
@@ -6424,7 +6413,7 @@ def test_tile():
            tile(x, reps, ndim)
        # error raising test:
-        # reps is tensor.vector and len(reps_value) > ndim,
+        # reps is tt.vector and len(reps_value) > ndim,
        # reps_value is the real value when excuting the function.
        reps = ivector()
        r = [2, 3, 4, 5, 6, 7]
@@ -7286,7 +7275,7 @@ def test_var():
    assert np.allclose(v, f(a_val))
    # Test that we don't upcast float16 computation
-    assert theano.tensor.vector(dtype="float16").var().dtype == "float16"
+    assert tt.vector(dtype="float16").var().dtype == "float16"
 class TestSum:
@@ -7298,7 +7287,7 @@ class TestSum:
    def test_list(self):
        ll = [theano.shared(0.0), theano.shared(2.0)]
-        tensor.sum(ll).eval() == 2
+        tt.sum(ll).eval() == 2
 @pytest.mark.skipif(
@@ -7420,7 +7409,7 @@ def _test_autocast_numpy():
    # Go through some typical scalar values.
    def ok(z):
-        assert tensor.constant(z).dtype == np.asarray(z).dtype
+        assert tt.constant(z).dtype == np.asarray(z).dtype
    for x in (
        [2 ** i for i in range(63)] + [0, 0, 1, 2 ** 63 - 1] + [0.0, 1.0, 1.1, 1.5]
@@ -7442,9 +7431,9 @@ def _test_autocast_numpy_floatX():
    def ok(z, floatX):
        if isinstance(z, float) and floatX == "float32" and not hasattr(z, "dtype"):
            # Special case where we use 'float32' instead of 'float64'.
-            assert tensor.constant(z).dtype == "float32"
+            assert tt.constant(z).dtype == "float32"
        else:
-            assert tensor.constant(z).dtype == np.asarray(z).dtype
+            assert tt.constant(z).dtype == np.asarray(z).dtype
    try:
        # Test with various values of `config.floatX`.
@@ -7484,13 +7473,13 @@ class TestArithmeticCast:
        # array == 1d array
        # i_scalar == scalar type used internally by Theano
        def theano_scalar(dtype):
-            return tensor.scalar(dtype=str(dtype))
+            return tt.scalar(dtype=str(dtype))
        def numpy_scalar(dtype):
            return np.array(1, dtype=dtype)
        def theano_array(dtype):
-            return tensor.vector(dtype=str(dtype))
+            return tt.vector(dtype=str(dtype))
        def numpy_array(dtype):
            return np.array([1], dtype=dtype)
@@ -7524,8 +7513,8 @@ class TestArithmeticCast:
                            # special way (depending on `config.int_division`).
                            is_int_division = (
                                op is operator_div
-                                and a_type in tensor.discrete_dtypes
+                                and a_type in tt.discrete_dtypes
-                                and b_type in tensor.discrete_dtypes
+                                and b_type in tt.discrete_dtypes
                            )
                            # We will test all meaningful combinations of
                            # scalar and array operations.
@@ -7655,7 +7644,7 @@ class TestLongTensor:
            val = 2 ** exponent - 1
            scalar_ct = constant(val)
-            assert scalar_ct.dtype in tensor.int_dtypes, (
+            assert scalar_ct.dtype in tt.int_dtypes, (
                exponent,
                val,
                scalar_ct.dtype,
@@ -7745,7 +7734,7 @@ class TestBroadcast:
    def test_patternbroadcast(self):
        # Test that patternbroadcast with an empty broadcasting pattern works
        x = scalar("x")
-        m = tensor.matrix("m")
+        m = tt.matrix("m")
        s = patternbroadcast(m, x.broadcastable)
        assert s is m
        x2 = patternbroadcast(x, x.broadcastable)
@@ -7786,7 +7775,7 @@ class TestBroadcast:
 def test_len():
    for shape_ in [(5,), (3, 4), (7, 4, 6)]:
-        x = tensor.tensor(dtype="floatX", broadcastable=(False,) * len(shape_))
+        x = tt.tensor(dtype="floatX", broadcastable=(False,) * len(shape_))
        with pytest.raises(TypeError):
            len(x)
@@ -7853,9 +7842,9 @@ def test_mod_compile():
    #
    # The c_code generated is not compiling as of 30 June 2010. I fix the
    # compilation in the same commit.
-    x = tensor.vector()
+    x = tt.vector()
-    y = tensor.vector()
+    y = tt.vector()
-    out = tensor.switch(tensor.eq(3 % x.shape[0], 0), y, y[:-1])
+    out = tt.switch(tt.eq(3 % x.shape[0], 0), y, y[:-1])
    theano.function([x, y], out)
@@ -7874,7 +7863,7 @@ def test_unalign():
    b[:] = rand(len(b))
    out_numpy = 2 * a + 3 * b
-    av, bv = tensor.vectors("ab")
+    av, bv = tt.vectors("ab")
    f = theano.function([av, bv], 2 * av + 3 * bv)
    f.maker.fgraph.toposort()
@@ -7893,7 +7882,7 @@ def test_unalign():
    assert not b.flags.aligned
    out_numpy = 2 * a + 3 * b
-    av, bv = tensor.scalars("ab")
+    av, bv = tt.scalars("ab")
    f = theano.function([av, bv], 2 * av + 3 * bv)
    f.maker.fgraph.toposort()
    try:
@@ -7907,12 +7896,12 @@ def test_unalign():
 def test_dimshuffle_duplicate():
-    x = tensor.vector()
+    x = tt.vector()
    success = False
    try:
-        tensor.DimShuffle((False,), (0, 0))(x)
+        tt.DimShuffle((False,), (0, 0))(x)
    except ValueError as e:
        assert str(e).find("may not appear twice") != -1
        success = True
@@ -7922,32 +7911,32 @@ def test_dimshuffle_duplicate():
 class TestGetScalarConstantValue:
    def test_get_scalar_constant_value(self):
-        a = tensor.stack([1, 2, 3])
+        a = tt.stack([1, 2, 3])
        assert get_scalar_constant_value(a[0]) == 1
        assert get_scalar_constant_value(a[1]) == 2
        assert get_scalar_constant_value(a[2]) == 3
-        b = tensor.iscalar()
+        b = tt.iscalar()
-        a = tensor.stack([b, 2, 3])
+        a = tt.stack([b, 2, 3])
-        with pytest.raises(tensor.basic.NotScalarConstantError):
+        with pytest.raises(tt.basic.NotScalarConstantError):
            get_scalar_constant_value(a[0])
        assert get_scalar_constant_value(a[1]) == 2
        assert get_scalar_constant_value(a[2]) == 3
        # For now get_scalar_constant_value goes through only MakeVector and Join of
        # scalars.
-        v = tensor.ivector()
+        v = tt.ivector()
-        a = tensor.stack([v, [2], [3]])
+        a = tt.stack([v, [2], [3]])
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(a[0])
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(a[1])
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(a[2])
        # Test the case SubTensor(Shape(v)) when the dimensions
        # is broadcastable.
-        v = tensor.row()
+        v = tt.row()
        assert get_scalar_constant_value(v.shape[0]) == 1
    def test_subtensor_of_constant(self):
@@ -7962,14 +7951,14 @@ class TestGetScalarConstantValue:
    def test_numpy_array(self):
        # Regression test for crash when called on a numpy array.
        assert get_scalar_constant_value(np.array(3)) == 3
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(np.array([0, 1]))
-        with pytest.raises(tensor.EmptyConstantError):
+        with pytest.raises(tt.EmptyConstantError):
            get_scalar_constant_value(np.array([]))
    def test_make_vector(self):
        mv = opt.make_vector(1, 2, 3)
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(mv)
        assert get_scalar_constant_value(mv[0]) == 1
        assert get_scalar_constant_value(mv[1]) == 2
@@ -7978,23 +7967,23 @@ class TestGetScalarConstantValue:
        assert get_scalar_constant_value(mv[np.int64(1)]) == 2
        assert get_scalar_constant_value(mv[np.uint(2)]) == 3
        t = theano.scalar.Scalar("int64")
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(mv[t()])
    def test_shape_i(self):
-        c = theano.tensor.constant(np.random.rand(3, 4))
+        c = tt.constant(np.random.rand(3, 4))
        s = opt.Shape_i(0)(c)
        assert get_scalar_constant_value(s) == 3
        s = opt.Shape_i(1)(c)
        assert get_scalar_constant_value(s) == 4
        d = theano.shared(np.random.randn(1, 1), broadcastable=(True, True))
-        f = theano.tensor.basic.ScalarFromTensor()(opt.Shape_i(0)(d))
+        f = tt.ScalarFromTensor()(opt.Shape_i(0)(d))
        assert get_scalar_constant_value(f) == 1
    def test_elemwise(self):
        # We test only for a few elemwise, the list of all supported
        # elemwise are in the fct.
-        c = theano.tensor.constant(np.random.rand())
+        c = tt.constant(np.random.rand())
        s = c + 1
        assert np.allclose(get_scalar_constant_value(s), c.data + 1)
        s = c - 1
@@ -8003,14 +7992,14 @@ class TestGetScalarConstantValue:
        assert np.allclose(get_scalar_constant_value(s), c.data * 1.2)
        s = c < 0.5
        assert np.allclose(get_scalar_constant_value(s), int(c.data < 0.5))
-        s = tensor.second(c, 0.4)
+        s = tt.second(c, 0.4)
        assert np.allclose(get_scalar_constant_value(s), 0.4)
    def test_assert(self):
        # Make sure we still get the constant value if it is wrapped in
        # an Assert.
-        c = theano.tensor.constant(2)
+        c = tt.constant(2)
-        x = theano.tensor.scalar()
+        x = tt.scalar()
        # condition is always True
        a = opt.Assert()(c, c > 1)
@@ -8019,25 +8008,25 @@ class TestGetScalarConstantValue:
        with change_flags(compute_test_value="off"):
            # condition is always False
            a = opt.Assert()(c, c > 2)
-            with pytest.raises(tensor.NotScalarConstantError):
+            with pytest.raises(tt.NotScalarConstantError):
                get_scalar_constant_value(a)
        # condition is not constant
        a = opt.Assert()(c, c > x)
-        with pytest.raises(tensor.NotScalarConstantError):
+        with pytest.raises(tt.NotScalarConstantError):
            get_scalar_constant_value(a)
    def test_second(self):
        # Second should apply when the value is constant but not the shape
-        c = theano.tensor.constant(np.random.rand())
+        c = tt.constant(np.random.rand())
-        shp = theano.tensor.vector()
+        shp = tt.vector()
-        s = theano.tensor.second(shp, c)
+        s = tt.second(shp, c)
        assert get_scalar_constant_value(s) == c.data
    def test_copy(self):
        # Make sure we do not return the internal storage of a constant,
        # so we cannot change the value of a constant by mistake.
-        c = theano.tensor.constant(3)
+        c = tt.constant(3)
        d = extract_constant(c)
        d += 1
        e = extract_constant(c)
@@ -8058,17 +8047,17 @@ class TestComplexMod:
 class TestSize:
    # Ensure the `size` attribute of tensors behaves as in numpy.
    def test_matrix(self):
-        x = tensor.matrix()
+        x = tt.matrix()
        y = np.zeros((5, 7), dtype=config.floatX)
        assert y.size == function([x], x.size)(y)
    def test_vector(self):
-        x = tensor.vector()
+        x = tt.vector()
        y = np.zeros(7, dtype=config.floatX)
        assert y.size == function([x], x.size)(y)
    def test_scalar(self):
-        x = tensor.scalar()
+        x = tt.scalar()
        y = np.array(7, dtype=config.floatX)
        assert y.size == function([x], x.size)(y)
@@ -8080,7 +8069,7 @@ class TestSize:
 class TestDiag:
-    # Test that tensor.diag has the same behavior as np.diag.
+    # Test that tt.diag has the same behavior as np.diag.
    # np.diag has two behaviors:
    #
    # (1) when given a vector, it returns a matrix with that vector as the
@@ -8095,15 +8084,15 @@ class TestDiag:
    # the right op based on the dimension of the input.
    def setup_method(self):
        self.mode = None
-        self.shared = tensor._shared
+        self.shared = tt._shared
        self.floatX = config.floatX
-        self.type = tensor.TensorType
+        self.type = tt.TensorType
    def test_diag(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # test vector input
-        x = theano.tensor.vector()
+        x = tt.vector()
        g = diag(x)
        assert isinstance(g.owner.op, AllocDiag)
        f = theano.function([x], g)
@@ -8128,14 +8117,14 @@ class TestDiag:
            assert (r == v).all()
        # Test scalar input
-        xx = theano.tensor.scalar()
+        xx = tt.scalar()
        with pytest.raises(ValueError):
            diag(xx)
    def test_infer_shape(self):
        rng = np.random.RandomState(utt.fetch_seed())
-        x = theano.tensor.vector()
+        x = tt.vector()
        g = diag(x)
        f = theano.function([x], g.shape)
        topo = f.maker.fgraph.toposort()
@@ -8145,7 +8134,7 @@ class TestDiag:
            m = rng.rand(shp).astype(self.floatX)
            assert (f(m) == np.diag(m).shape).all()
-        x = theano.tensor.matrix()
+        x = tt.matrix()
        g = diag(x)
        f = theano.function([x], g.shape)
        topo = f.maker.fgraph.toposort()
@@ -8158,9 +8147,9 @@ class TestDiag:
    def test_diag_grad(self):
        rng = np.random.RandomState(utt.fetch_seed())
        x = rng.rand(5)
-        tensor.verify_grad(diag, [x], rng=rng)
+        tt.verify_grad(diag, [x], rng=rng)
        x = rng.rand(5, 3)
-        tensor.verify_grad(diag, [x], rng=rng)
+        tt.verify_grad(diag, [x], rng=rng)
 class TestAllocDiag:
@@ -8223,9 +8212,9 @@ class TestAllocDiag:
                assert np.all(rediag_shape == test_val.shape)
                diag_x = adiag_op(x)
-                sum_diag_x = tensor.sum(diag_x)
+                sum_diag_x = tt.sum(diag_x)
-                grad_x = tensor.grad(sum_diag_x, x)
+                grad_x = tt.grad(sum_diag_x, x)
-                grad_diag_x = tensor.grad(sum_diag_x, diag_x)
+                grad_diag_x = tt.grad(sum_diag_x, diag_x)
                f_grad_x = theano.function([x], grad_x, mode=self.mode)
                f_grad_diag_x = theano.function([x], grad_diag_x, mode=self.mode)
                grad_input = f_grad_x(test_val)
@@ -8262,9 +8251,9 @@ class TestNumpyAssumptions:
 def test_transpose():
-    x1 = tensor.dvector("x1")
+    x1 = tt.dvector("x1")
-    x2 = tensor.dmatrix("x2")
+    x2 = tt.dmatrix("x2")
-    x3 = tensor.dtensor3("x3")
+    x3 = tt.dtensor3("x3")
    x1v = np.arange(24)
    x2v = np.arange(24).reshape(2, 12)
@@ -8273,16 +8262,16 @@ def test_transpose():
    f = theano.function(
        [x1, x2, x3],
        [
-            tensor.transpose(x1),
+            tt.transpose(x1),
-            tensor.transpose(x2),
+            tt.transpose(x2),
-            tensor.transpose(x3),
+            tt.transpose(x3),
            x1.transpose(),
            x2.transpose(),
            x3.transpose(),
            x2.transpose(0, 1),
            x3.transpose((0, 2, 1)),
-            tensor.transpose(x2, [0, 1]),
+            tt.transpose(x2, [0, 1]),
-            tensor.transpose(x3, [0, 2, 1]),
+            tt.transpose(x3, [0, 2, 1]),
        ],
    )
@@ -8306,10 +8295,10 @@ def test_transpose():
    assert np.all(t3d == np.transpose(x3v, [0, 2, 1]))
    # Check that we create a name.
-    assert tensor.transpose(x1).name == "x1.T"
+    assert tt.transpose(x1).name == "x1.T"
-    assert tensor.transpose(x2).name == "x2.T"
+    assert tt.transpose(x2).name == "x2.T"
-    assert tensor.transpose(x3).name == "x3.T"
+    assert tt.transpose(x3).name == "x3.T"
-    assert tensor.transpose(tensor.dmatrix()).name is None
+    assert tt.transpose(tt.dmatrix()).name is None
 def test_stacklists():
@@ -8536,7 +8525,7 @@ class TestInferShape(utt.InferShapeTester):
            [advec, bdvec],
            [Dot()(advec, bdvec)],
            [advec_val, bdvec_val],
-            (Dot, tensor.blas.Dot22, tensor.blas.Gemv, tensor.blas_c.CGemv),
+            (Dot, tt.blas.Dot22, tt.blas.Gemv, tt.blas_c.CGemv),
        )
        # mat/mat
@@ -8548,7 +8537,7 @@ class TestInferShape(utt.InferShapeTester):
            [admat, bdmat],
            [Dot()(admat, bdmat)],
            [admat_val, bdmat_val],
-            (Dot, tensor.blas.Dot22),
+            (Dot, tt.blas.Dot22),
        )
        # vec/mat
@@ -8557,7 +8546,7 @@ class TestInferShape(utt.InferShapeTester):
            [advec, bdmat],
            [Dot()(advec, bdmat)],
            [advec_val, bdmat_val],
-            (Dot, tensor.blas.Dot22, tensor.blas.Gemv, tensor.blas_c.CGemv),
+            (Dot, tt.blas.Dot22, tt.blas.Gemv, tt.blas_c.CGemv),
        )
        # mat/vec
@@ -8566,7 +8555,7 @@ class TestInferShape(utt.InferShapeTester):
            [admat, bdvec],
            [Dot()(admat, bdvec)],
            [admat_val, bdvec_val],
-            (Dot, tensor.blas.Dot22, tensor.blas.Gemv, tensor.blas_c.CGemv),
+            (Dot, tt.blas.Dot22, tt.blas.Gemv, tt.blas_c.CGemv),
        )
        # Split
@@ -9034,7 +9023,7 @@ class TestTensorInstanceMethods:
 def test_norm():
-    x = theano.tensor.vector("x")
+    x = tt.vector("x")
    n = x.norm(2)
    f = theano.function([x], n)
    assert np.allclose(f([1, 1]), np.sqrt(2))
@@ -9042,8 +9031,8 @@ def test_norm():
 class TestCov:
    def test_core(self):
-        x = theano.tensor.matrix("x")
+        x = tt.matrix("x")
-        c = theano.tensor.cov(x)
+        c = tt.cov(x)
        f = theano.function([x], c)
        # basic cov function
@@ -9064,8 +9053,8 @@ class TestCov:
    def test_rowvar(self):
        for rowvar in [True, False]:
-            x = theano.tensor.matrix("x")
+            x = tt.matrix("x")
-            c = theano.tensor.cov(x, rowvar=rowvar)
+            c = tt.cov(x, rowvar=rowvar)
            f = theano.function([x], c)
            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
@@ -9081,17 +9070,17 @@ class TestCov:
            assert np.allclose(f(data), np.cov(data, rowvar=rowvar))
        # check when variables are along the first axis
-        x = theano.tensor.matrix("x")
+        x = tt.matrix("x")
-        c = theano.tensor.cov(x, rowvar=False)
+        c = tt.cov(x, rowvar=False)
        f = theano.function([x], c)
        data = np.asarray(np.random.rand(2, 1), dtype=config.floatX)
        assert np.allclose(f(data), np.cov(data, rowvar=False))
    def test_y(self):
        # test y
-        x = theano.tensor.matrix("x")
+        x = tt.matrix("x")
-        y = theano.tensor.matrix("y")
+        y = tt.matrix("y")
-        c = theano.tensor.cov(x, y=y)
+        c = tt.cov(x, y=y)
        f = theano.function([x, y], c)
        data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
@@ -9113,8 +9102,8 @@ class TestCov:
    def test_ddof(self):
        for ddof in range(0, 5):
-            x = theano.tensor.matrix("x")
+            x = tt.matrix("x")
-            c = theano.tensor.cov(x, ddof=ddof)
+            c = tt.cov(x, ddof=ddof)
            f = theano.function([x], c)
            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
@@ -9123,8 +9112,8 @@ class TestCov:
    def test_bias(self):
        for bias in [True, False]:
-            x = theano.tensor.matrix("x")
+            x = tt.matrix("x")
-            c = theano.tensor.cov(x, bias=bias)
+            c = tt.cov(x, bias=bias)
            f = theano.function([x], c)
            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
@@ -9132,8 +9121,8 @@ class TestCov:
        for ddof in range(0, 5):
            for bias in [True, False]:
-                x = theano.tensor.matrix("x")
+                x = tt.matrix("x")
-                c = theano.tensor.cov(x, ddof=ddof, bias=bias)
+                c = tt.cov(x, ddof=ddof, bias=bias)
                f = theano.function([x], c)
                data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
@@ -9243,12 +9232,12 @@ class TestSwapaxes:
        assert np.array_equal(testMatrix, f(f(testMatrix)))
    def test_interface(self):
-        x = theano.tensor.matrix()
+        x = tt.matrix()
        x.swapaxes(0, 1)
    def test_numpy_compare(self):
        rng = np.random.RandomState(utt.fetch_seed())
-        A = tensor.matrix("A", dtype=theano.config.floatX)
+        A = tt.matrix("A", dtype=theano.config.floatX)
        Q = swapaxes(A, 0, 1)
        fn = function([A], [Q])
        a = rng.rand(4, 4).astype(theano.config.floatX)
@@ -9261,7 +9250,7 @@ class TestSwapaxes:
 class TestPower:
    def test_numpy_compare(self):
        rng = np.random.RandomState(utt.fetch_seed())
-        A = tensor.matrix("A", dtype=theano.config.floatX)
+        A = tt.matrix("A", dtype=theano.config.floatX)
        Q = power(A, 3)
        fn = function([A], [Q])
        a = rng.rand(4, 4).astype(theano.config.floatX)
@@ -9271,14 +9260,14 @@ class TestPower:
        assert np.allclose(n_p, t_p)
    def test_multiple_power(self):
-        x = tensor.vector()
+        x = tt.vector()
        y = [1, 2, 3]
        z = power(x, y)
        f = function([x], z)
        assert np.allclose(f([1, 2, 3]), [1, 4, 27])
    def test_wrong_shape(self):
-        x = tensor.vector()
+        x = tt.vector()
        y = [1, 2, 3]
        z = power(x, y)
        f = function([x], z)
@@ -9293,8 +9282,8 @@ class TestChoose(utt.InferShapeTester):
    def test_numpy_compare(self):
-        a = tensor.vector(dtype="int32")
+        a = tt.vector(dtype="int32")
-        b = tensor.matrix(dtype="float32")
+        b = tt.matrix(dtype="float32")
        A = np.random.randint(0, 4, 4).astype("int32")
        B = np.asarray(np.random.rand(4, 4), dtype="float32")
@@ -9306,8 +9295,8 @@ class TestChoose(utt.InferShapeTester):
            assert np.allclose(t_c, n_c)
    def test_method(self):
-        a = tensor.vector(dtype="int32")
+        a = tt.vector(dtype="int32")
-        b = tensor.matrix(dtype="float32")
+        b = tt.matrix(dtype="float32")
        A = np.random.randint(0, 4, 4).astype("int32")
        B = np.asarray(np.random.rand(4, 4), dtype="float32")
@@ -9319,8 +9308,8 @@ class TestChoose(utt.InferShapeTester):
            assert np.allclose(t_c, n_c)
    def test_broadcasted(self):
-        a = tensor.scalar(dtype="int32")
+        a = tt.scalar(dtype="int32")
-        b = tensor.matrix(dtype="float32")
+        b = tt.matrix(dtype="float32")
        # Test when a is broadcastable
        A = 3
@@ -9333,7 +9322,7 @@ class TestChoose(utt.InferShapeTester):
            assert np.allclose(t_c, n_c)
        # Test when the result should be broadcastable
-        b = theano.tensor.col(dtype="float32")
+        b = tt.col(dtype="float32")
        B = np.asarray(np.random.rand(4, 1), dtype="float32")
        for m in self.modes:
            f = function([a, b], choose(a, b, mode=m))
@@ -9343,17 +9332,17 @@ class TestChoose(utt.InferShapeTester):
            assert np.allclose(t_c, n_c)
    def test_dtype_error(self):
-        a = tensor.scalar(dtype="float32")
+        a = tt.scalar(dtype="float32")
-        b = tensor.matrix(dtype="float32")
+        b = tt.matrix(dtype="float32")
        with pytest.raises(TypeError):
            choose(a, b)
    def test_numpy_compare_tuple(self):
-        a = tensor.tensor3(dtype="int32")
+        a = tt.tensor3(dtype="int32")
-        b = tensor.tensor3(dtype="float32")
+        b = tt.tensor3(dtype="float32")
-        c = tensor.tensor3(dtype="float32")
+        c = tt.tensor3(dtype="float32")
        A = np.random.randint(0, 2, (2, 1, 1)).astype("int32")
        B = np.asarray(np.random.rand(1, 6, 1), dtype="float32")
@@ -9385,8 +9374,8 @@ class TestChoose(utt.InferShapeTester):
            ((4,), (1,)),
            ((1,), (1,)),
        ]:
-            a = tensor.tensor(dtype="int32", broadcastable=[n == 1 for n in shp1])
+            a = tt.tensor(dtype="int32", broadcastable=[n == 1 for n in shp1])
-            c = tensor.tensor(dtype="float32", broadcastable=[n == 1 for n in shp2])
+            c = tt.tensor(dtype="float32", broadcastable=[n == 1 for n in shp2])
            A = np.asarray(np.random.rand(*shp1) * shp2[0], dtype="int32")
            C = np.asarray(np.random.rand(*shp2) * shp2[0], dtype="float32")
            self._compile_and_check(
@@ -9402,9 +9391,9 @@ class TestChoose(utt.InferShapeTester):
    @pytest.mark.skip(reason="Not implemented")
    def test_infer_shape_tuple(self):
-        a = tensor.tensor3(dtype="int32")
+        a = tt.tensor3(dtype="int32")
-        b = tensor.tensor3(dtype="int32")
+        b = tt.tensor3(dtype="int32")
-        c = tensor.tensor3(dtype="int32")
+        c = tt.tensor3(dtype="int32")
        A = np.asarray([1, 0], dtype="int32").reshape((2, 1, 1))
        B = np.asarray(np.random.rand(1, 4, 1), dtype="int32")
@@ -9436,7 +9425,7 @@ def test_allocempty():
 def test_symbolic_slice():
-    x = theano.tensor.tensor4("x")
+    x = tt.tensor4("x")
    a, b = x.shape[:2]
    output = a.eval({x: np.zeros((5, 4, 3, 2), dtype=theano.config.floatX)})
    assert output == np.array(5)
--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
@@ -3,7 +3,7 @@ import pytest
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 import theano.tensor.blas_scipy
 from copy import copy
@@ -25,7 +25,7 @@ from numpy import (
 )
 from numpy.testing import assert_array_almost_equal
-from theano import tensor, In, shared, config
+from theano import In, shared, config
 from theano.tensor.blas import (
    _dot22,
    _dot22scalar,
@@ -42,7 +42,17 @@ from theano.tensor.blas import (
    Ger,
    ger,
    ger_destructive,
+    Dot22,
+    Dot22Scalar,
+    gemm,
+    local_dot22_to_dot22scalar,
+    gemv_no_inplace,
+    gemv,
+    gemv_inplace,
+    local_gemm_to_ger,
 )
+from theano.tensor.nnet import sigmoid
+from theano.tensor.opt import in2out
 from tests import unittest_tools
 from tests.tensor.test_basic import as_tensor_variable, inplace_func, compile, inplace
@@ -60,7 +70,7 @@ mode_blas_opt = mode_blas_opt.excluding("c_blas")
 def test_dot_eq():
-    assert T.Dot() == T.Dot()
+    assert tt.Dot() == tt.Dot()
 def sharedX(x, name):
@@ -174,18 +184,18 @@ class TestGemm:
        self.cmp(self.rand(0, 0), -1.0, self.rand(0, 0), self.rand(0, 0), -1.0)
    def test_factorised_scalar(self):
-        a = T.matrix()
+        a = tt.matrix()
-        b = T.matrix()
+        b = tt.matrix()
        s = theano.shared(np.zeros((5, 5)).astype(config.floatX))
-        lr1 = T.constant(0.01).astype(config.floatX)
+        lr1 = tt.constant(0.01).astype(config.floatX)
-        lr2 = T.constant(2).astype(config.floatX)
+        lr2 = tt.constant(2).astype(config.floatX)
-        l2_reg = T.constant(0.0001).astype(config.floatX)
+        l2_reg = tt.constant(0.0001).astype(config.floatX)
        # test constant merge with gemm
        f = theano.function(
            [a, b],
-            updates=[(s, lr1 * T.dot(a, b) + l2_reg * lr2 * s)],
+            updates=[(s, lr1 * tt.dot(a, b) + l2_reg * lr2 * s)],
            mode=mode_not_fast_compile,
        ).maker.fgraph.toposort()
        # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
@@ -197,7 +207,7 @@ class TestGemm:
        # test factored scalar with merge
        f = theano.function(
            [a, b],
-            updates=[(s, lr1 * (T.dot(a, b) - l2_reg * s))],
+            updates=[(s, lr1 * (tt.dot(a, b) - l2_reg * s))],
            mode=mode_not_fast_compile,
        ).maker.fgraph.toposort()
        # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
@@ -209,7 +219,7 @@ class TestGemm:
        # test factored scalar with merge and neg
        f = theano.function(
            [a, b],
-            updates=[(s, s - lr1 * (s * 0.0002 + T.dot(a, b)))],
+            updates=[(s, s - lr1 * (s * 0.0002 + tt.dot(a, b)))],
            mode=mode_not_fast_compile,
        ).maker.fgraph.toposort()
        # [Gemm{inplace}(<TensorType(float64, matrix)>, -0.01,
@@ -249,7 +259,7 @@ class TestGemm:
        # test that dot args can be aliased
        Z = shared(self.rand(2, 2), name="Z")
        A = shared(self.rand(2, 2), name="A")
-        one = T.constant(1.0).astype(Z.dtype)
+        one = tt.constant(1.0).astype(Z.dtype)
        f = inplace_func([], gemm_inplace(Z, one, A, A, one))
        f()
        f = inplace_func([], gemm_inplace(Z, one, A, A.T, one))
@@ -349,7 +359,7 @@ class TestGemm:
                g_i = theano.function(
                    [],
                    tz_i,
-                    updates=[(tz, T.set_subtensor(tz[:, :, i], tz_i))],
+                    updates=[(tz, tt.set_subtensor(tz[:, :, i], tz_i))],
                    mode=compile.Mode(optimizer=None, linker=l),
                )
                for j in range(3):
@@ -400,11 +410,11 @@ class TestGemmNoFlags(object):
        slice_B=False,
        slice_C=False,
    ):
-        alpha = theano.tensor.scalar(dtype=dtype, name="alpha")
+        alpha = tt.scalar(dtype=dtype, name="alpha")
-        beta = theano.tensor.scalar(dtype=dtype, name="beta")
+        beta = tt.scalar(dtype=dtype, name="beta")
-        A = theano.tensor.matrix(dtype=dtype, name="A")
+        A = tt.matrix(dtype=dtype, name="A")
-        B = theano.tensor.matrix(dtype=dtype, name="B")
+        B = tt.matrix(dtype=dtype, name="B")
-        C = theano.tensor.matrix(dtype=dtype, name="C")
+        C = tt.matrix(dtype=dtype, name="C")
        A1 = self.get_variable(A, transpose_A, slice_A)
        B1 = self.get_variable(B, transpose_B, slice_B)
@@ -522,9 +532,9 @@ class TestGemmNoFlags(object):
 def test_res_is_a():
    X, Y, Z, a, b = XYZab()
-    assert not res_is_a(a, T.sqrt)
+    assert not res_is_a(a, tt.sqrt)
-    assert not res_is_a(a + a, T.sqrt)
+    assert not res_is_a(a + a, tt.sqrt)
-    assert res_is_a(T.sqrt(a + a), T.sqrt)
+    assert res_is_a(tt.sqrt(a + a), tt.sqrt)
    # leave the maxclients  stuff untested because it requires being in an fgraph.
@@ -532,13 +542,13 @@ def test_res_is_a():
 class TestAsScalar:
    def test_basic(self):
        # Test that it works on scalar constants
-        a = T.constant(2.5)
+        a = tt.constant(2.5)
-        b = T.constant(np.asarray([[[0.5]]]))
+        b = tt.constant(np.asarray([[[0.5]]]))
        b2 = b.dimshuffle()
        assert b2.ndim == 0
-        d_a = T.DimShuffle([], [])(a)
+        d_a = tt.DimShuffle([], [])(a)
-        d_b = T.DimShuffle([True, True, True], [0, 2, 1])(b)
+        d_b = tt.DimShuffle([True, True, True], [0, 2, 1])(b)
-        d_a2 = T.DimShuffle([], ["x", "x", "x"])(a)
+        d_a2 = tt.DimShuffle([], ["x", "x", "x"])(a)
        assert _as_scalar(a) == a
        assert _as_scalar(b) != b
@@ -548,15 +558,15 @@ class TestAsScalar:
    def test_basic_1(self):
        # Test that it fails on nonscalar constants
-        a = T.constant(np.ones(5))
+        a = tt.constant(np.ones(5))
        assert _as_scalar(a) is None
-        assert _as_scalar(T.DimShuffle([False], [0, "x"])(a)) is None
+        assert _as_scalar(tt.DimShuffle([False], [0, "x"])(a)) is None
    def test_basic_2(self):
        # Test that it works on scalar variables
-        a = T.dscalar()
+        a = tt.dscalar()
-        d_a = T.DimShuffle([], [])(a)
+        d_a = tt.DimShuffle([], [])(a)
-        d_a2 = T.DimShuffle([], ["x", "x"])(a)
+        d_a2 = tt.DimShuffle([], ["x", "x"])(a)
        assert _as_scalar(a) is a
        assert _as_scalar(d_a) is a
@@ -564,15 +574,15 @@ class TestAsScalar:
    def test_basic_3(self):
        # Test that it fails on nonscalar variables
-        a = T.matrix()
+        a = tt.matrix()
        assert _as_scalar(a) is None
-        assert _as_scalar(T.DimShuffle([False, False], [0, "x", 1])(a)) is None
+        assert _as_scalar(tt.DimShuffle([False, False], [0, "x", 1])(a)) is None
 class TestRealMatrix:
    def test_basic(self):
-        assert _is_real_matrix(T.DimShuffle([False, False], [1, 0])(T.matrix()))
+        assert _is_real_matrix(tt.DimShuffle([False, False], [1, 0])(tt.matrix()))
-        assert not _is_real_matrix(T.DimShuffle([False], ["x", 0])(T.dvector()))
+        assert not _is_real_matrix(tt.DimShuffle([False], ["x", 0])(tt.dvector()))
 def fail(msg):
@@ -587,7 +597,7 @@ that the resulting functions compute the same things as the originals.
 def XYZab():
-    return T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
+    return tt.matrix(), tt.matrix(), tt.matrix(), tt.scalar(), tt.scalar()
 def just_gemm(
@@ -602,7 +612,7 @@ def just_gemm(
    nb_gemm = 0
    for node in f.maker.fgraph.apply_nodes:
        assert not isinstance(
-            node.op, T.Dot
+            node.op, tt.Dot
        ), "_dot22 not changed to gemm_inplace in graph"
        assert node.op != _dot22
        if node.op == gemm_inplace:
@@ -640,42 +650,42 @@ def test_gemm_opt0():
    # Many subgraphs whose dots can be eliminated
    X, Y, Z, a, b = XYZab()
-    just_gemm([X, Y, Z, a, b], [T.dot(X, Y) * a + Z * b])
+    just_gemm([X, Y, Z, a, b], [tt.dot(X, Y) * a + Z * b])
-    just_gemm([X, Y, Z, a, b], [a * T.dot(X, Y) + b * Z])
+    just_gemm([X, Y, Z, a, b], [a * tt.dot(X, Y) + b * Z])
-    just_gemm([X, Y, Z, a, b], [b * Z + a * T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [b * Z + a * tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [T.dot(X, Y) * a - Z * b])
+    just_gemm([X, Y, Z, a, b], [tt.dot(X, Y) * a - Z * b])
-    just_gemm([X, Y, Z, a, b], [a * T.dot(X, Y) - b * Z])
+    just_gemm([X, Y, Z, a, b], [a * tt.dot(X, Y) - b * Z])
-    just_gemm([X, Y, Z, a, b], [b * Z - a * T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [b * Z - a * tt.dot(X, Y)])
    # with transposes (transposes should be pushed through dot in canonicalize)
-    just_gemm([X, Y, Z, a, b], [b * Z.T - a * T.dot(Y.T, X.T)])
+    just_gemm([X, Y, Z, a, b], [b * Z.T - a * tt.dot(Y.T, X.T)])
-    just_gemm([X, Y, Z, a, b], [b * Z.T + a * b * T.dot(X, Y).T])
+    just_gemm([X, Y, Z, a, b], [b * Z.T + a * b * tt.dot(X, Y).T])
    just_gemm(
        [X, Y, Z, a, b],
-        [b * Z + a * T.dot(X, Y).T],
+        [b * Z + a * tt.dot(X, Y).T],
        ishapes=[(5, 3), (3, 4), (4, 5), (), ()],
    )
    # with N multiplications instead of just one
-    just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * T.dot(X, Y) * b])
+    just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * tt.dot(X, Y) * b])
-    just_gemm([X, Y, Z, a, b], [Z + T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z + tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [Z * b + T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z * b + tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [Z + a * b * a * T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z + a * b * a * tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [(b * b) * Z * a - (a * a) * T.dot(X, Y) * b])
+    just_gemm([X, Y, Z, a, b], [(b * b) * Z * a - (a * a) * tt.dot(X, Y) * b])
-    just_gemm([X, Y, Z, a, b], [Z - T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z - tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [Z * b - T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z * b - tt.dot(X, Y)])
-    just_gemm([X, Y, Z, a, b], [Z - a * b * a * T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z - a * b * a * tt.dot(X, Y)])
 @unittest_tools.assertFailure_fast
 def test_gemm_opt_double_gemm():
    # This is the pattern that shows up in the autoencoder
-    X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
+    X, Y, Z, a, b = tt.matrix(), tt.matrix(), tt.matrix(), tt.scalar(), tt.scalar()
-    R, S, c = T.matrix(), T.matrix(), T.scalar()
+    R, S, c = tt.matrix(), tt.matrix(), tt.scalar()
    just_gemm(
        [X, Y, Z, a, b, R, S, c],
-        [Z * c + a * T.dot(X, Y) + b * T.dot(R, S).T],
+        [Z * c + a * tt.dot(X, Y) + b * tt.dot(R, S).T],
        ishapes=[(4, 3), (3, 5), (4, 5), (), (), (5, 9), (9, 4), ()],
        expected_nb_gemm=2,
    )
@@ -684,8 +694,8 @@ def test_gemm_opt_double_gemm():
    i = [X, Y, Z, a, b, R, S, c]
    o = [
        (
-            a * T.dot(X, Y)
+            a * tt.dot(X, Y)
-            + gemm_inplace(Z, b, S.T, R.T, T.constant(1.0).astype(config.floatX))
+            + gemm_inplace(Z, b, S.T, R.T, tt.constant(1.0).astype(config.floatX))
        )
    ]
    f = inplace_func(
@@ -695,7 +705,7 @@ def test_gemm_opt_double_gemm():
        on_unused_input="ignore",
    )
    for node in f.maker.fgraph.apply_nodes:
-        assert not isinstance(node.op, T.Dot)
+        assert not isinstance(node.op, tt.Dot)
        assert node.op != _dot22
    g = inplace_func(
        i,
@@ -717,16 +727,16 @@ def test_gemm_opt_double_gemm():
 def test_gemm_canonicalize():
    X, Y, Z, a, b = (
-        T.matrix("X"),
+        tt.matrix("X"),
-        T.matrix("Y"),
+        tt.matrix("Y"),
-        T.matrix("Z"),
+        tt.matrix("Z"),
-        T.scalar("a"),
+        tt.scalar("a"),
-        T.scalar("b"),
+        tt.scalar("b"),
    )
-    c, d = T.scalar("c"), T.scalar("d")
+    c, d = tt.scalar("c"), tt.scalar("d")
-    u = T.row("u")
+    u = tt.row("u")
-    v = T.vector("v")
+    v = tt.vector("v")
-    w = T.col("w")
+    w = tt.col("w")
    can = []
    _gemm_canonicalize(X + Y + Z, 1.0, can, 0)
@@ -744,7 +754,7 @@ def test_gemm_canonicalize():
    assert len(can[2]) == 2
    assert can[2][0] == 1.0
    assert can[2][1].owner
-    assert isinstance(can[2][1].owner.op, T.DimShuffle)
+    assert isinstance(can[2][1].owner.op, tt.DimShuffle)
    assert can[2][1].owner.inputs == [v]
    can = []
@@ -755,26 +765,26 @@ def test_gemm_canonicalize():
    _gemm_canonicalize(a * X + Y - b * Z * c, 1.0, can, 0)
    assert can[0] == (a, X)
    assert can[1] == (1.0, Y)
-    assert can[2][0].owner.op == T.mul
+    assert can[2][0].owner.op == tt.mul
-    assert can[2][0].owner.inputs[0].owner.op == T.neg
+    assert can[2][0].owner.inputs[0].owner.op == tt.neg
    assert can[2][0].owner.inputs[0].owner.inputs[0] == c
    assert can[2][0].owner.inputs[1] == b
    can = []
    _gemm_canonicalize((-d) * X - (a * X + Y - b * Z * c), 1.0, can, 0)
    # print can
-    assert can[0][0].owner.op == T.neg
+    assert can[0][0].owner.op == tt.neg
    assert can[0][0].owner.inputs[0] == d
    assert can[0][1] == X
-    assert can[1][0].owner.op == T.neg
+    assert can[1][0].owner.op == tt.neg
    assert can[1][0].owner.inputs[0] == a
    assert can[2] == (-1.0, Y)
-    assert can[3][0].owner.op == T.mul
+    assert can[3][0].owner.op == tt.mul
    assert can[3][0].owner.inputs == [c, b]
 def test_gemm_factor():
-    X, Y = T.matrix("X"), T.matrix("Y")
+    X, Y = tt.matrix("X"), tt.matrix("Y")
    assert [(1.0, X), (1.0, Y)] == _factor_canonicalized([(1.0, X), (1.0, Y)])
    assert [(2.0, X)] == _factor_canonicalized([(1.0, X), (1.0, X)])
@@ -783,27 +793,27 @@ def test_gemm_factor():
 def test_upcasting_scalar_nogemm():
    # Test that the optimization does not crash when the scale has an incorrect
    # dtype, and forces upcasting of the result
-    v = T.fmatrix("v")
+    v = tt.fmatrix("v")
-    w = T.fmatrix("w")
+    w = tt.fmatrix("w")
-    t = T.fmatrix("t")
+    t = tt.fmatrix("t")
-    alpha = T.dscalar("a")
+    alpha = tt.dscalar("a")
-    rval = T.dot(w, v) * alpha + t
+    rval = tt.dot(w, v) * alpha + t
    f = theano.function([w, v, t, alpha], rval)
    t = f.maker.fgraph.toposort()
    assert np.sum([isinstance(n.op, Gemm) for n in t]) == 0
    # theano.printing.debugprint(f, print_type=True)
-    v = T.fmatrix("v")
+    v = tt.fmatrix("v")
-    w = T.fmatrix("w")
+    w = tt.fmatrix("w")
-    t = T.fmatrix("t")
+    t = tt.fmatrix("t")
-    alpha = T.cscalar("a")
+    alpha = tt.cscalar("a")
    on_opt_error = config.on_opt_error
    try:
        config.on_opt_error = "raise"
-        rval = T.dot(w, v) * alpha + t
+        rval = tt.dot(w, v) * alpha + t
        f = theano.function([w, v, t, alpha], rval)
    finally:
        config.on_opt_error = on_opt_error
@@ -815,51 +825,51 @@ def test_upcasting_scalar_nogemm():
 def test_gemm_nested():
    X, Y, Z, a, b = (
-        T.matrix("X"),
+        tt.matrix("X"),
-        T.matrix("Y"),
+        tt.matrix("Y"),
-        T.matrix("Z"),
+        tt.matrix("Z"),
-        T.scalar("a"),
+        tt.scalar("a"),
-        T.scalar("b"),
+        tt.scalar("b"),
    )
    R, S, U, c, d = (
-        T.matrix("R"),
+        tt.matrix("R"),
-        T.matrix("S"),
+        tt.matrix("S"),
-        T.matrix("U"),
+        tt.matrix("U"),
-        T.scalar("c"),
+        tt.scalar("c"),
-        T.scalar("d"),
+        tt.scalar("d"),
    )
    just_gemm(
        [X, Y, Z, R, S, U, a, b, c, d],
-        [a * Z - b * (c * T.dot(X, Y) + d * Z)],
+        [a * Z - b * (c * tt.dot(X, Y) + d * Z)],
        ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (2, 4), (), (), (), ()],
        max_graphlen=1,
    )
    # print "---------------------"
    just_gemm(
        [X, Y, Z, R, S, U, a, b, c, d],
-        [a * Z - b * (c * T.dot(X, Y) + d * Z + c * Z)],
+        [a * Z - b * (c * tt.dot(X, Y) + d * Z + c * Z)],
        ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (2, 4), (), (), (), ()],
        max_graphlen=1,
    )
    # print "---------------------"
    just_gemm(
        [X, Y, Z, R, S, U, a, b, c, d],
-        [a * Z - b * (c * T.dot(X, Y) + d * Z + c * U)],
+        [a * Z - b * (c * tt.dot(X, Y) + d * Z + c * U)],
        ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (2, 4), (), (), (), ()],
        max_graphlen=3,
    )
 def test_gemm_opt_wishlist():
-    X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
+    X, Y, Z, a, b = tt.matrix(), tt.matrix(), tt.matrix(), tt.scalar(), tt.scalar()
    # with >2 additions of the same T.dot(X,Y term
    just_gemm(
-        [X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * T.dot(X, Y) + b * T.dot(X, Y)]
+        [X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * tt.dot(X, Y) + b * tt.dot(X, Y)]
    )
-    just_gemm([X, Y, Z, a, b], [Z + T.dot(X, Y) + T.dot(X, Y)])
+    just_gemm([X, Y, Z, a, b], [Z + tt.dot(X, Y) + tt.dot(X, Y)])
 def test_gemm_with_vector():
@@ -868,39 +878,39 @@ def test_gemm_with_vector():
    # bug.
    X, Y, Z, a, b = XYZab()
-    v = T.vector()
+    v = tt.vector()
    def my_just_gemm(o):
        i = [X, Y, Z, a, b, v]
        ishapes = [(4, 3), (3, 5), (4, 5), (), (), (5,)]
        just_gemm(i, o, ishapes=ishapes)
-    my_just_gemm([v + T.dot(X, Y) * a + Z * b])
+    my_just_gemm([v + tt.dot(X, Y) * a + Z * b])
-    my_just_gemm([v + a * T.dot(X, Y) + b * Z])
+    my_just_gemm([v + a * tt.dot(X, Y) + b * Z])
-    my_just_gemm([v + b * Z + a * T.dot(X, Y)])
+    my_just_gemm([v + b * Z + a * tt.dot(X, Y)])
-    my_just_gemm([v + T.dot(X, Y) * a - Z * b])
+    my_just_gemm([v + tt.dot(X, Y) * a - Z * b])
-    my_just_gemm([v + a * T.dot(X, Y) - b * Z])
+    my_just_gemm([v + a * tt.dot(X, Y) - b * Z])
-    my_just_gemm([v + b * Z - a * T.dot(X, Y)])
+    my_just_gemm([v + b * Z - a * tt.dot(X, Y)])
    # with N multiplications instead of just one
-    my_just_gemm([v + (b * b) * Z * a + (a * a) * T.dot(X, Y) * b])
+    my_just_gemm([v + (b * b) * Z * a + (a * a) * tt.dot(X, Y) * b])
-    my_just_gemm([v + Z + T.dot(X, Y)])
+    my_just_gemm([v + Z + tt.dot(X, Y)])
-    my_just_gemm([v + Z * b + T.dot(X, Y)])
+    my_just_gemm([v + Z * b + tt.dot(X, Y)])
-    my_just_gemm([v + Z + a * b * a * T.dot(X, Y)])
+    my_just_gemm([v + Z + a * b * a * tt.dot(X, Y)])
-    my_just_gemm([v + (b * b) * Z * a - (a * a) * T.dot(X, Y) * b])
+    my_just_gemm([v + (b * b) * Z * a - (a * a) * tt.dot(X, Y) * b])
-    my_just_gemm([Z - T.dot(X, Y) + v])
+    my_just_gemm([Z - tt.dot(X, Y) + v])
-    my_just_gemm([Z * b - T.dot(X, Y) + v])
+    my_just_gemm([Z * b - tt.dot(X, Y) + v])
-    my_just_gemm([Z - a * b * a * T.dot(X, Y) + v])
+    my_just_gemm([Z - a * b * a * tt.dot(X, Y) + v])
 def test_gemm_opt_vector_stuff():
-    X, Y, a = T.matrix(), T.matrix(), T.scalar()
+    X, Y, a = tt.matrix(), tt.matrix(), tt.scalar()
-    u, v = T.vector(), T.vector()
+    u, v = tt.vector(), tt.vector()
-    f = inplace_func([a, u, v], a + T.dot(u, v), mode="FAST_RUN")
+    f = inplace_func([a, u, v], a + tt.dot(u, v), mode="FAST_RUN")
    assert gemm_inplace not in [n.op for n in f.maker.fgraph.apply_nodes]
-    f = inplace_func([a, u, X, Y], a * u + T.dot(X, Y), mode="FAST_RUN")
+    f = inplace_func([a, u, X, Y], a * u + tt.dot(X, Y), mode="FAST_RUN")
    assert gemm_inplace not in [n.op for n in f.maker.fgraph.apply_nodes]
@@ -925,10 +935,10 @@ def test_gemm_unrolled():
        cur_H = H
        def update_V(cur_H):
-            return T.nnet.sigmoid(T.dot(cur_H, W.T))
+            return sigmoid(tt.dot(cur_H, W.T))
        def update_H(cur_V):
-            return T.nnet.sigmoid(T.dot(cur_V, W) + T.dot(G, W.T))
+            return sigmoid(tt.dot(cur_V, W) + tt.dot(G, W.T))
        for i in range(num_rounds):
            cur_V = update_V(cur_H)
@@ -944,9 +954,9 @@ def test_gemm_unrolled():
                if isinstance(
                    node.op,
                    (
-                        theano.tensor.Dot,
+                        tt.Dot,
-                        theano.tensor.blas.Dot22,
+                        Dot22,
-                        theano.tensor.blas.Gemm,
+                        Gemm,
                    ),
                )
            ]
@@ -962,22 +972,22 @@ def test_inplace0():
    # should fail to insert gemm_inplace because gemm_inplace would
    # create cycles
    X, Y, Z, a, b = (
-        T.matrix("X"),
+        tt.matrix("X"),
-        T.matrix("Y"),
+        tt.matrix("Y"),
-        T.matrix("Z"),
+        tt.matrix("Z"),
-        T.scalar("a"),
+        tt.scalar("a"),
-        T.scalar("b"),
+        tt.scalar("b"),
    )
-    R, S, c = T.matrix("R"), T.matrix("S"), T.scalar("c")
+    R, S, c = tt.matrix("R"), tt.matrix("S"), tt.scalar("c")
-    f = inplace_func([Z, b, R, S], [Z * (Z + b * T.dot(R, S).T)], mode="FAST_RUN")
+    f = inplace_func([Z, b, R, S], [Z * (Z + b * tt.dot(R, S).T)], mode="FAST_RUN")
    assert gemm_inplace not in [n.op for n in f.maker.fgraph.apply_nodes]
    assert gemm_no_inplace in [n.op for n in f.maker.fgraph.apply_nodes]
    # gemm_inplace should be inserted here, to work in-place on Z*c
    f = inplace_func(
        [X, Y, Z, a, b, R, S, c],
-        [Z * (c * Z + a * T.dot(X, Y) + b * T.dot(R, S).T)],
+        [Z * (c * Z + a * tt.dot(X, Y) + b * tt.dot(R, S).T)],
        mode="FAST_RUN",
    )
    assert gemm_inplace in [n.op for n in f.maker.fgraph.apply_nodes]
@@ -986,7 +996,7 @@ def test_inplace0():
 def test_inplace1():
    X, Y, Z, a, b = XYZab()
    # with > 2 terms in the overall addition
-    f = inplace_func([X, Y, Z], [Z + Z + T.dot(X, Y)], mode="FAST_RUN")
+    f = inplace_func([X, Y, Z], [Z + Z + tt.dot(X, Y)], mode="FAST_RUN")
    # theano.printing.debugprint(f)
    # it doesn't work inplace because we didn't mark Z as mutable input
    assert [n.op for n in f.maker.fgraph.apply_nodes] == [gemm_no_inplace]
@@ -994,15 +1004,15 @@ def test_inplace1():
 def test_dot22():
    for dtype1 in ["float32", "float64", "complex64", "complex128"]:
-        a = T.matrix(dtype=dtype1)
+        a = tt.matrix(dtype=dtype1)
        for dtype2 in ["float32", "float64", "complex64", "complex128"]:
-            b = T.matrix(dtype=dtype2)
+            b = tt.matrix(dtype=dtype2)
-            f = theano.function([a, b], T.dot(a, b), mode=mode_blas_opt)
+            f = theano.function([a, b], tt.dot(a, b), mode=mode_blas_opt)
            topo = f.maker.fgraph.toposort()
            if dtype1 == dtype2:
                assert _dot22 in [x.op for x in topo], (dtype1, dtype2)
            else:
-                check = [isinstance(x.op, T.Dot) for x in topo]
+                check = [isinstance(x.op, tt.Dot) for x in topo]
                assert any(check), (dtype1, dtype2)
            rng = np.random.RandomState(unittest_tools.fetch_seed())
@@ -1029,14 +1039,14 @@ def test_dot22scalar():
    # m = theano.compile.get_default_mode().including('BlasOpt', 'specialize')
    rng = np.random.RandomState(unittest_tools.fetch_seed())
    for dtype1 in ["complex64", "complex128"]:
-        a = T.matrix("a", dtype=dtype1)
+        a = tt.matrix("a", dtype=dtype1)
        for dtype2 in ["complex64", "complex128"]:
-            b = T.matrix("b", dtype=dtype2)
+            b = tt.matrix("b", dtype=dtype2)
            for dtype3 in ["complex64", "complex128"]:
-                c = T.matrix("c", dtype=dtype3)
+                c = tt.matrix("c", dtype=dtype3)
                for dtype4 in ["complex64", "complex128"]:
-                    cst = theano.tensor.basic.constant(0.2, dtype=dtype4)
+                    cst = tt.constant(0.2, dtype=dtype4)
-                    cst2 = theano.tensor.basic.constant(0.1, dtype=dtype4)
+                    cst2 = tt.constant(0.1, dtype=dtype4)
                    def check_dot22scalar(func, len_topo_scalar=-1):
                        topo = func.maker.fgraph.toposort()
@@ -1072,7 +1082,7 @@ def test_dot22scalar():
                        elif dtype1 == dtype2:
                            assert _dot22 in ops, (dtype1, dtype2, dtype3, dtype4)
                        else:
-                            check = [isinstance(o, T.Dot) for o in ops]
+                            check = [isinstance(o, tt.Dot) for o in ops]
                            assert any(check), (dtype1, dtype2, dtype3, dtype4)
                    def cmp(a_shp, b_shp, c_shp, sqr_shp=(5, 5)):
@@ -1083,7 +1093,7 @@ def test_dot22scalar():
                        if False:
                            f = theano.function(
-                                [a, b], cst * T.dot(a, b), mode=mode_blas_opt
+                                [a, b], cst * tt.dot(a, b), mode=mode_blas_opt
                            )
                            f.maker.fgraph.toposort()
                            check_dot22scalar(f, 1)
@@ -1092,7 +1102,7 @@ def test_dot22scalar():
                        if True:
                            f = theano.function(
-                                [a, b, c], cst * c * T.dot(a, b), mode=mode_blas_opt
+                                [a, b, c], cst * c * tt.dot(a, b), mode=mode_blas_opt
                            )
                            f.maker.fgraph.toposort()
                            check_dot22scalar(f, 2)
@@ -1100,7 +1110,7 @@ def test_dot22scalar():
                            f(av, bv, cv)
                        f = theano.function(
-                            [a, b, c], c * cst * T.dot(a, b), mode=mode_blas_opt
+                            [a, b, c], c * cst * tt.dot(a, b), mode=mode_blas_opt
                        )
                        f.maker.fgraph.toposort()
                        check_dot22scalar(f, 2)
@@ -1110,7 +1120,7 @@ def test_dot22scalar():
                        # TODO: add only the optimizations needed?
                        m2 = mode_blas_opt.including("canonicalize")
                        f = theano.function(
-                            [a, b, c], cst2 * c * cst * T.dot(a, b), mode=m2
+                            [a, b, c], cst2 * c * cst * tt.dot(a, b), mode=m2
                        )
                        f.maker.fgraph.toposort()
                        check_dot22scalar(f, 2)
@@ -1118,14 +1128,16 @@ def test_dot22scalar():
                        if dtype1 == dtype2 == dtype3:
                            f = theano.function(
-                                [a, b, c], c * cst * a * T.dot(a, b), mode=m2
+                                [a, b, c], c * cst * a * tt.dot(a, b), mode=m2
                            )
                            f.maker.fgraph.toposort()
                            check_dot22scalar(f, 2)
                            f(sv, sv, sv)
                            f = theano.function(
-                                [a, b, c], cst * c * a * T.dot(a, b), mode=mode_blas_opt
+                                [a, b, c],
+                                cst * c * a * tt.dot(a, b),
+                                mode=mode_blas_opt,
                            )
                            f.maker.fgraph.toposort()
                            # currently the canonizer don't always
@@ -1141,7 +1153,7 @@ def test_dot22scalar():
                            f(sv, sv, sv)
                            f = theano.function(
-                                [a, b, c], c * a * cst * T.dot(a, b), mode=m2
+                                [a, b, c], c * a * cst * tt.dot(a, b), mode=m2
                            )
                            f.maker.fgraph.toposort()
                            check_dot22scalar(f, 2)
@@ -1158,15 +1170,15 @@ def test_dot22scalar():
 def test_dot22scalar_cast():
    # Test that in `dot22_to_dot22scalar` we properly cast integers to floats.
    # Note that this test was failing before d5ff6904.
-    A = T.dmatrix()
+    A = tt.dmatrix()
-    for scalar_int_type in T.int_dtypes:
+    for scalar_int_type in tt.int_dtypes:
-        y = T.scalar(dtype=scalar_int_type)
+        y = tt.scalar(dtype=scalar_int_type)
-        f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
+        f = theano.function([A, y], tt.dot(A, A) * y, mode=mode_blas_opt)
        assert _dot22scalar in [x.op for x in f.maker.fgraph.toposort()]
-    A = T.fmatrix()
+    A = tt.fmatrix()
-    for scalar_int_type in T.int_dtypes:
+    for scalar_int_type in tt.int_dtypes:
-        y = T.scalar(dtype=scalar_int_type)
+        y = tt.scalar(dtype=scalar_int_type)
-        f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
+        f = theano.function([A, y], tt.dot(A, A) * y, mode=mode_blas_opt)
        if scalar_int_type in ["int32", "int64"]:
            assert _dot22 in [x.op for x in f.maker.fgraph.toposort()]
        else:
@@ -1175,46 +1187,46 @@ def test_dot22scalar_cast():
 def test_local_dot22_to_dot22scalar():
    # This test that the bug in gh-1507 is really fixed
-    A = T.dmatrix()
+    A = tt.dmatrix()
    mode = theano.compile.mode.get_default_mode()
-    opt = theano.tensor.opt.in2out(theano.tensor.blas.local_dot22_to_dot22scalar)
+    opt = in2out(local_dot22_to_dot22scalar)
    mode = mode.__class__(optimizer=opt)
-    x = T.dscalar()
+    x = tt.dscalar()
-    y = T.dscalar()
+    y = tt.dscalar()
-    z = T.dscalar()
+    z = tt.dscalar()
    # make sure to don't have dimshuffle as we don't opt those cases
-    m = T.dmatrix()
+    m = tt.dmatrix()
-    r = T.drow()
+    r = tt.drow()
    for idx, node in enumerate(
        [
            # Old working cases
-            T.mul(_dot22(A, A), x),
+            tt.mul(_dot22(A, A), x),
-            T.mul(_dot22(A, A), x, y),
+            tt.mul(_dot22(A, A), x, y),
-            T.mul(_dot22(A, A), x, r),
+            tt.mul(_dot22(A, A), x, r),
-            T.mul(_dot22(A, A), m, x),
+            tt.mul(_dot22(A, A), m, x),
-            T.mul(_dot22(A, A), x, m),
+            tt.mul(_dot22(A, A), x, m),
-            T.mul(_dot22(A, A), x, (m * y)),
+            tt.mul(_dot22(A, A), x, (m * y)),
-            T.mul(_dot22(A, A), (m * y), x),
+            tt.mul(_dot22(A, A), (m * y), x),
-            T.mul(_dot22(A, A), x, (r * y)),
+            tt.mul(_dot22(A, A), x, (r * y)),
-            T.mul(_dot22(A, A), (r * y), x),
+            tt.mul(_dot22(A, A), (r * y), x),
-            T.mul(_dot22(A, A), (x * y), (m * x)),
+            tt.mul(_dot22(A, A), (x * y), (m * x)),
-            T.mul(_dot22(A, A), (r * y), (y * x)),
+            tt.mul(_dot22(A, A), (r * y), (y * x)),
            # Case that was raising an assert that is fixed in gh-1507
-            T.mul(_dot22(A, A), (m * y), m),
+            tt.mul(_dot22(A, A), (m * y), m),
-            T.mul(_dot22(A, A), m, (m * y)),
+            tt.mul(_dot22(A, A), m, (m * y)),
-            T.mul(_dot22(A, A), (r * y), (m * x)),
+            tt.mul(_dot22(A, A), (r * y), (m * x)),
            # assert fixed in gh-1507 and opt case added in gh-1515
-            T.mul(_dot22(A, A), (m * y * z), m),
+            tt.mul(_dot22(A, A), (m * y * z), m),
-            T.mul(_dot22(A, A), m, (m * y * z)),
+            tt.mul(_dot22(A, A), m, (m * y * z)),
            # Opt case added in gh-1515
-            T.mul(_dot22(A, A), T.mul(m, y, z), m),
+            tt.mul(_dot22(A, A), tt.mul(m, y, z), m),
-            T.mul(_dot22(A, A), m, T.mul(m, y, z)),
+            tt.mul(_dot22(A, A), m, tt.mul(m, y, z)),
            # Case that opt later in gh-1515
-            T.mul(_dot22(A, A), (r * m), (m * x)),
+            tt.mul(_dot22(A, A), (r * m), (m * x)),
        ]
    ):
-        node2 = theano.tensor.blas.local_dot22_to_dot22scalar.transform(node.owner)
+        node2 = local_dot22_to_dot22scalar.transform(node.owner)
        assert node2
        f = theano.function(
            [x, y, z, m, r, A], node, mode=mode, on_unused_input="ignore"
@@ -1228,11 +1240,11 @@ def test_dot_w_self():
    # one of the inputs.
    A = shared(value=np.ones((2, 2)))
-    B = T.matrix()
+    B = tt.matrix()
-    p = T.dot(A, A) * B
+    p = tt.dot(A, A) * B
-    grad = T.grad(T.mean(p), A)
+    grad = tt.grad(tt.mean(p), A)
    f = theano.function([B], p, updates=[(A, A - grad)])
    # tests correctness in debugmode
@@ -1253,7 +1265,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        f = theano.function([], theano.dot(v, w), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
-        self.assertFunctionContains0(f, T.dot)
+        self.assertFunctionContains0(f, tt.dot)
        self.assertFunctionContains1(f, Gemv(True))
        # Assert they produce the same output
@@ -1267,7 +1279,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        f = theano.function([], theano.dot(v, m), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
-        self.assertFunctionContains0(f, T.dot)
+        self.assertFunctionContains0(f, tt.dot)
        self.assertFunctionContains1(f, Gemv(True))
        # Assert they produce the same output
@@ -1284,7 +1296,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        f = theano.function([], theano.dot(m, v), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
-        self.assertFunctionContains0(f, T.dot)
+        self.assertFunctionContains0(f, tt.dot)
        self.assertFunctionContains1(f, Gemv(True))
        # Assert they produce the same output
@@ -1402,7 +1414,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        assert sum(isinstance(node.op, Gemv) for node in topo) == 1
        # call gemv directly for mixed broadcast pattern.
-        o = theano.tensor.blas.gemv_no_inplace(v2, 0.5, m, v1, 0.25)
+        o = gemv_no_inplace(v2, 0.5, m, v1, 0.25)
        f = theano.function([], o, mode=mode_blas_opt)
        assert np.allclose(
            f(), 0.5 * np.dot(m.get_value(), v1.get_value()) + 0.25 * v2.get_value()
@@ -1411,12 +1423,12 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        assert sum(isinstance(node.op, Gemv) for node in topo) == 1
    def test_gemv_dimensions(self):
-        A = T.matrix("A")
+        A = tt.matrix("A")
-        x, y = T.vectors("x", "y")
+        x, y = tt.vectors("x", "y")
        alpha = theano.shared(theano._asarray(1.0, dtype=config.floatX), name="alpha")
        beta = theano.shared(theano._asarray(1.0, dtype=config.floatX), name="beta")
-        z = beta * y + alpha * T.dot(A, x)
+        z = beta * y + alpha * tt.dot(A, x)
        f = theano.function([A, x, y], z)
        # Matrix value
@@ -1487,7 +1499,7 @@ class BaseGemv(object):
            + beta.get_value() * y.get_value()
        )
-        oy = alpha * T.dot(a, x) + beta * y
+        oy = alpha * tt.dot(a, x) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1507,7 +1519,7 @@ class BaseGemv(object):
        desired_oy = matrixmultiply(a_v, x_v)
-        oy = T.dot(a, x)
+        oy = tt.dot(a, x)
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1523,7 +1535,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(transpose(a_v), x_v) + beta_v * y_v
-        oy = alpha * T.dot(a.T, x) + beta * y
+        oy = alpha * tt.dot(a.T, x) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1539,7 +1551,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(a_v, x_v[::2]) + beta_v * y_v
-        oy = alpha * T.dot(a, x[::2]) + beta * y
+        oy = alpha * tt.dot(a, x[::2]) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1555,7 +1567,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(transpose(a_v), x_v[::2]) + beta_v * y_v
-        oy = alpha * T.dot(a.T, x[::2]) + beta * y
+        oy = alpha * tt.dot(a.T, x[::2]) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1571,7 +1583,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(a_v, x_v) + beta_v * y_v[::2]
-        oy = alpha * T.dot(a, x) + beta * y[::2]
+        oy = alpha * tt.dot(a, x) + beta * y[::2]
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1587,7 +1599,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(transpose(a_v), x_v) + beta_v * y_v[::2]
-        oy = alpha * T.dot(a.T, x) + beta * y[::2]
+        oy = alpha * tt.dot(a.T, x) + beta * y[::2]
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1607,7 +1619,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(a_v, x_v) + beta_v * y_v
-        oy = alpha * T.dot(a, x) + beta * y
+        oy = alpha * tt.dot(a, x) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1627,7 +1639,7 @@ class BaseGemv(object):
        desired_oy = alpha_v * matrixmultiply(transpose(a_v), x_v) + beta_v * y_v
-        oy = alpha * T.dot(a.T, x) + beta * y
+        oy = alpha * tt.dot(a.T, x) + beta * y
        oy_func = theano.function([], oy, mode=self.mode)
@@ -1647,12 +1659,12 @@ class BaseGemv(object):
        x_v = x_v.astype("float32")
        y_v = y_v.astype("float32")
-        alpha = T.dscalar("alpha")
+        alpha = tt.dscalar("alpha")
        a = self.shared(a_v)
        x = self.shared(x_v)
        y = self.shared(y_v)
-        rval = T.dot(a, x) * alpha + y
+        rval = tt.dot(a, x) * alpha + y
        f = theano.function([alpha], rval, mode=self.mode)
        # this function is currently optimized so that the gemv is
@@ -1671,14 +1683,14 @@ class BaseGemv(object):
 class TestSgemv(BaseGemv, unittest_tools.OptimizationTestMixin):
    dtype = float32
-    gemv = theano.tensor.blas.gemv_no_inplace
+    gemv = gemv_no_inplace
-    gemv_inplace = theano.tensor.blas.gemv_inplace
+    gemv_inplace = gemv_inplace
 class TestDgemv(BaseGemv, unittest_tools.OptimizationTestMixin):
    dtype = float64
-    gemv = theano.tensor.blas.gemv_no_inplace
+    gemv = gemv_no_inplace
-    gemv_inplace = theano.tensor.blas.gemv_inplace
+    gemv_inplace = gemv_inplace
 # The optimization to put Gemv don't work for complex type for now.
@@ -1696,30 +1708,30 @@ class TestDgemv(BaseGemv, unittest_tools.OptimizationTestMixin):
 class TestGerMakeNode:
    def setup_method(self):
-        self.iv = T.tensor(dtype="int32", broadcastable=(False,))
+        self.iv = tt.tensor(dtype="int32", broadcastable=(False,))
-        self.fv = T.tensor(dtype="float32", broadcastable=(False,))
+        self.fv = tt.tensor(dtype="float32", broadcastable=(False,))
-        self.fv1 = T.tensor(dtype="float32", broadcastable=(True,))
+        self.fv1 = tt.tensor(dtype="float32", broadcastable=(True,))
-        self.dv = T.tensor(dtype="float64", broadcastable=(False,))
+        self.dv = tt.tensor(dtype="float64", broadcastable=(False,))
-        self.dv1 = T.tensor(dtype="float64", broadcastable=(True,))
+        self.dv1 = tt.tensor(dtype="float64", broadcastable=(True,))
-        self.cv = T.tensor(dtype="complex64", broadcastable=(False,))
+        self.cv = tt.tensor(dtype="complex64", broadcastable=(False,))
-        self.zv = T.tensor(dtype="complex128", broadcastable=(False,))
+        self.zv = tt.tensor(dtype="complex128", broadcastable=(False,))
-        self.fv_2 = T.tensor(dtype="float32", broadcastable=(False,))
+        self.fv_2 = tt.tensor(dtype="float32", broadcastable=(False,))
-        self.fv1_2 = T.tensor(dtype="float32", broadcastable=(True,))
+        self.fv1_2 = tt.tensor(dtype="float32", broadcastable=(True,))
-        self.dv_2 = T.tensor(dtype="float64", broadcastable=(False,))
+        self.dv_2 = tt.tensor(dtype="float64", broadcastable=(False,))
-        self.dv1_2 = T.tensor(dtype="float64", broadcastable=(True,))
+        self.dv1_2 = tt.tensor(dtype="float64", broadcastable=(True,))
-        self.cv_2 = T.tensor(dtype="complex64", broadcastable=(False,))
+        self.cv_2 = tt.tensor(dtype="complex64", broadcastable=(False,))
-        self.zv_2 = T.tensor(dtype="complex128", broadcastable=(False,))
+        self.zv_2 = tt.tensor(dtype="complex128", broadcastable=(False,))
-        self.fm = T.fmatrix()
+        self.fm = tt.fmatrix()
-        self.dm = T.dmatrix()
+        self.dm = tt.dmatrix()
-        self.cm = T.cmatrix()
+        self.cm = tt.cmatrix()
-        self.zm = T.zmatrix()
+        self.zm = tt.zmatrix()
-        self.fa = T.fscalar()
+        self.fa = tt.fscalar()
-        self.da = T.dscalar()
+        self.da = tt.dscalar()
-        self.ca = T.cscalar()
+        self.ca = tt.cscalar()
-        self.za = T.zscalar()
+        self.za = tt.zscalar()
    def test_works_on_all_valid_dtypes(self):
        assert self.fm.type == ger(self.fm, self.fa, self.fv, self.fv_2).type
@@ -1729,7 +1741,7 @@ class TestGerMakeNode:
    def test_fails_on_invalid_dtypes(self):
        with pytest.raises(TypeError):
-            ger(T.imatrix(), T.iscalar(), T.ivector(), T.ivector())
+            ger(tt.imatrix(), tt.iscalar(), tt.ivector(), tt.ivector())
    def test_fails_for_nonscalar_alpha(self):
        with pytest.raises(TypeError):
@@ -1783,10 +1795,10 @@ class TestGer(unittest_tools.OptimizationTestMixin):
        self.mode = theano.compile.get_default_mode().including("fast_run")
        self.mode = self.mode.excluding("c_blas", "scipy_blas")
        dtype = self.dtype = "float64"  # optimization isn't dtype-dependent
-        self.A = T.tensor(dtype=dtype, broadcastable=(False, False))
+        self.A = tt.tensor(dtype=dtype, broadcastable=(False, False))
-        self.a = T.tensor(dtype=dtype, broadcastable=())
+        self.a = tt.tensor(dtype=dtype, broadcastable=())
-        self.x = T.tensor(dtype=dtype, broadcastable=(False,))
+        self.x = tt.tensor(dtype=dtype, broadcastable=(False,))
-        self.y = T.tensor(dtype=dtype, broadcastable=(False,))
+        self.y = tt.tensor(dtype=dtype, broadcastable=(False,))
        self.ger = ger
        self.ger_destructive = ger_destructive
        self.gemm = gemm_no_inplace
@@ -1797,11 +1809,11 @@ class TestGer(unittest_tools.OptimizationTestMixin):
        return theano.function(inputs, outputs, self.mode, updates=updates)
    def b(self, bval):
-        return T.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
+        return tt.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
    def test_b_0_triggers_ger(self):
        # test local_gemm_to_ger opt
-        assert T.blas.local_gemm_to_ger.transform(
+        assert local_gemm_to_ger.transform(
            gemm_no_inplace(
                self.A,
                self.a,
@@ -1813,7 +1825,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
    def test_b_1_triggers_ger(self):
        # test local_gemm_to_ger opt
-        assert T.blas.local_gemm_to_ger.transform(
+        assert local_gemm_to_ger.transform(
            gemm_no_inplace(
                self.A,
                self.a,
@@ -1825,7 +1837,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
    def test_b_other_does_not_triggers_ger(self):
        # test local_gemm_to_ger opt
-        assert not T.blas.local_gemm_to_ger.transform(
+        assert not local_gemm_to_ger.transform(
            gemm_no_inplace(
                self.A,
                self.a,
@@ -1837,7 +1849,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
    def test_b_nonconst_does_not_triggers_ger(self):
        # test local_gemm_to_ger opt
-        assert not T.blas.local_gemm_to_ger.transform(
+        assert not local_gemm_to_ger.transform(
            gemm_no_inplace(
                self.A,
                self.a,
@@ -1848,12 +1860,12 @@ class TestGer(unittest_tools.OptimizationTestMixin):
        )
    def test_outer(self):
-        f = self.function([self.x, self.y], T.outer(self.x, self.y))
+        f = self.function([self.x, self.y], tt.outer(self.x, self.y))
        self.assertFunctionContains(f, self.ger_destructive)
        f(np.random.rand(5).astype(self.dtype), np.random.rand(4).astype(self.dtype))
    def test_A_plus_outer(self):
-        f = self.function([self.A, self.x, self.y], self.A + T.outer(self.x, self.y))
+        f = self.function([self.A, self.x, self.y], self.A + tt.outer(self.x, self.y))
        self.assertFunctionContains(f, self.ger)
        f(
            np.random.rand(5, 4).astype(self.dtype),
@@ -1868,7 +1880,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
    def test_A_plus_scaled_outer(self):
        f = self.function(
-            [self.A, self.x, self.y], self.A + 0.1 * T.outer(self.x, self.y)
+            [self.A, self.x, self.y], self.A + 0.1 * tt.outer(self.x, self.y)
        )
        self.assertFunctionContains(f, self.ger)
        f(
@@ -1886,7 +1898,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
        f = self.function(
            [self.A, self.x, self.y],
            np.asarray(0.2, self.dtype) * self.A
-            + np.asarray(0.1, self.dtype) * T.outer(self.x, self.y),
+            + np.asarray(0.1, self.dtype) * tt.outer(self.x, self.y),
        )
        # Why gemm? This make the graph simpler did we test that it
        # make it faster?
@@ -1906,7 +1918,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
        # test corner case shape and dtype
        f = self.function(
-            [self.A, self.x, self.y], self.A + 0.1 * T.outer(self.x, self.y)
+            [self.A, self.x, self.y], self.A + 0.1 * tt.outer(self.x, self.y)
        )
        self.assertFunctionContains(f, self.ger)
        f(
@@ -1956,7 +1968,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
            [self.x, self.y],
            [],
            updates=[
-                (A, A + T.constant(0.1, dtype=self.dtype) * T.outer(self.x, self.y))
+                (A, A + tt.constant(0.1, dtype=self.dtype) * tt.outer(self.x, self.y))
            ],
        )
        self.assertFunctionContains(f, self.ger_destructive)
@@ -1970,7 +1982,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
 class TestBlasStrides:
    dtype = "float64"
-    shared = staticmethod(tensor._shared)
+    shared = staticmethod(tt._shared)
    mode = theano.compile.get_default_mode()
    mode = mode.including("fast_run").excluding("gpu", "c_blas", "scipy_blas")
    rng = np.random.RandomState(seed=unittest_tools.fetch_seed())
@@ -1995,17 +2007,13 @@ class TestBlasStrides:
        bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
-        f_nn = theano.function([], [], updates=[(a, tensor.dot(b, c))], mode=self.mode)
+        f_nn = theano.function([], [], updates=[(a, tt.dot(b, c))], mode=self.mode)
        # print 'class name:', self.__class__.__name__
        # theano.printing.debugprint(f_nn)
-        f_nt = theano.function(
+        f_nt = theano.function([], [], updates=[(a, tt.dot(b, c_t.T))], mode=self.mode)
-            [], [], updates=[(a, tensor.dot(b, c_t.T))], mode=self.mode
+        f_tn = theano.function([], [], updates=[(a, tt.dot(b_t.T, c))], mode=self.mode)
-        )
-        f_tn = theano.function(
-            [], [], updates=[(a, tensor.dot(b_t.T, c))], mode=self.mode
-        )
        f_tt = theano.function(
-            [], [], updates=[(a, tensor.dot(b_t.T, c_t.T))], mode=self.mode
+            [], [], updates=[(a, tt.dot(b_t.T, c_t.T))], mode=self.mode
        )
        # Try with all stride patterns, and all transposed pattern
@@ -2066,17 +2074,15 @@ class TestBlasStrides:
        bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
-        f_nn = theano.function(
+        f_nn = theano.function([], [], updates=[(a, l * tt.dot(b, c))], mode=self.mode)
-            [], [], updates=[(a, l * tensor.dot(b, c))], mode=self.mode
-        )
        f_nt = theano.function(
-            [], [], updates=[(a, l * tensor.dot(b, c_t.T))], mode=self.mode
+            [], [], updates=[(a, l * tt.dot(b, c_t.T))], mode=self.mode
        )
        f_tn = theano.function(
-            [], [], updates=[(a, l * tensor.dot(b_t.T, c))], mode=self.mode
+            [], [], updates=[(a, l * tt.dot(b_t.T, c))], mode=self.mode
        )
        f_tt = theano.function(
-            [], [], updates=[(a, l * tensor.dot(b_t.T, c_t.T))], mode=self.mode
+            [], [], updates=[(a, l * tt.dot(b_t.T, c_t.T))], mode=self.mode
        )
        # Try with all stride patterns, and all transposed pattern
@@ -2140,30 +2146,30 @@ class TestBlasStrides:
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
        f_nnn = theano.function(
-            [], [], updates=[(a, (l * a + tensor.dot(b, c)))], mode=self.mode
+            [], [], updates=[(a, (l * a + tt.dot(b, c)))], mode=self.mode
        )
        f_nnt = theano.function(
-            [], [], updates=[(a, (l * a + tensor.dot(b, c_t.T)))], mode=self.mode
+            [], [], updates=[(a, (l * a + tt.dot(b, c_t.T)))], mode=self.mode
        )
        f_ntn = theano.function(
-            [], [], updates=[(a, (l * a + tensor.dot(b_t.T, c)))], mode=self.mode
+            [], [], updates=[(a, (l * a + tt.dot(b_t.T, c)))], mode=self.mode
        )
        f_ntt = theano.function(
-            [], [], updates=[(a, (l * a + tensor.dot(b_t.T, c_t.T)))], mode=self.mode
+            [], [], updates=[(a, (l * a + tt.dot(b_t.T, c_t.T)))], mode=self.mode
        )
        f_tnn = theano.function(
-            [], [], updates=[(a_t, (l * a_t + tensor.dot(b, c).T))], mode=self.mode
+            [], [], updates=[(a_t, (l * a_t + tt.dot(b, c).T))], mode=self.mode
        )
        f_tnt = theano.function(
-            [], [], updates=[(a_t, (l * a_t + tensor.dot(b, c_t.T).T))], mode=self.mode
+            [], [], updates=[(a_t, (l * a_t + tt.dot(b, c_t.T).T))], mode=self.mode
        )
        f_ttn = theano.function(
-            [], [], updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c).T))], mode=self.mode
+            [], [], updates=[(a_t, (l * a_t + tt.dot(b_t.T, c).T))], mode=self.mode
        )
        f_ttt = theano.function(
            [],
            [],
-            updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c_t.T).T))],
+            updates=[(a_t, (l * a_t + tt.dot(b_t.T, c_t.T).T))],
            mode=self.mode,
        )
@@ -2260,11 +2266,11 @@ class TestBlasStrides:
        c_dev = c.get_value(borrow=False, return_internal_type=True)
        f_n = theano.function(
-            [], [], updates=[(a, (a + l * tensor.dot(b, c)))], mode=self.mode
+            [], [], updates=[(a, (a + l * tt.dot(b, c)))], mode=self.mode
        )
        f_t = theano.function(
-            [], [], updates=[(a, (a + l * tensor.dot(b_t.T, c)))], mode=self.mode
+            [], [], updates=[(a, (a + l * tt.dot(b_t.T, c)))], mode=self.mode
        )
        # Try with all stride patterns, and all transposed pattern
@@ -2313,11 +2319,11 @@ class TestBlasStrides:
        c_dev = c.get_value(borrow=False, return_internal_type=True)
        f_n = theano.function(
-            [], [], updates=[(a, (a + l * tensor.outer(b, c)))], mode=self.mode
+            [], [], updates=[(a, (a + l * tt.outer(b, c)))], mode=self.mode
        )
        f_t = theano.function(
-            [], [], updates=[(a_t, (a_t + l * tensor.outer(b, c).T))], mode=self.mode
+            [], [], updates=[(a_t, (a_t + l * tt.outer(b, c).T))], mode=self.mode
        )
        # Try with all stride patterns, and all transposed patterns
@@ -2365,8 +2371,8 @@ class TestBlasStrides:
        b = theano.shared(bval[:, :5], borrow=True)
        c = theano.shared(cval[:3, :5], borrow=True)
-        s = theano.tensor.scalar()
+        s = tt.scalar()
-        upd_c = s * c + theano.tensor.dot(a, b)
+        upd_c = s * c + tt.dot(a, b)
        f = theano.function([s], [], updates={c: upd_c})
        f(0)
@@ -2376,38 +2382,38 @@ class TestBlasStrides:
 class TestInferShape(unittest_tools.InferShapeTester):
    def test_dot22(self):
-        x, y = T.matrices("xy")
+        x, y = tt.matrices("xy")
        self._compile_and_check(
            [x, y],
-            [T.blas._dot22(x, y)],
+            [_dot22(x, y)],
            [
                np.random.random((2, 3)).astype(config.floatX),
                np.random.random((3, 4)).astype(config.floatX),
            ],
-            T.blas.Dot22,
+            Dot22,
        )
    def test_dot22scalar(self):
-        x, y = T.matrices("xy")
+        x, y = tt.matrices("xy")
-        a = T.scalar("a")
+        a = tt.scalar("a")
        self._compile_and_check(
            [x, y, a],
-            [T.blas._dot22scalar(x, y, a)],
+            [_dot22scalar(x, y, a)],
            [
                np.random.random((2, 3)).astype(config.floatX),
                np.random.random((3, 4)).astype(config.floatX),
                np.asarray(0.5, dtype=config.floatX),
            ],
-            T.blas.Dot22Scalar,
+            Dot22Scalar,
        )
    def test_gemm(self):
-        x, y, z = T.matrices("xyz")
+        x, y, z = tt.matrices("xyz")
-        a = T.scalar("a")
+        a = tt.scalar("a")
-        b = T.scalar("b")
+        b = tt.scalar("b")
        self._compile_and_check(
            [x, y, a, z, b],
-            [T.blas.gemm(z, a, x, y, b)],
+            [gemm(z, a, x, y, b)],
            [
                np.random.random((2, 3)).astype(config.floatX),
                np.random.random((3, 4)).astype(config.floatX),
@@ -2415,17 +2421,17 @@ class TestInferShape(unittest_tools.InferShapeTester):
                np.random.random((2, 4)).astype(config.floatX),
                np.asarray(0.5, dtype=config.floatX),
            ],
-            T.blas.Gemm,
+            Gemm,
        )
    def test_gemv(self):
-        A = T.matrix("A")
+        A = tt.matrix("A")
-        x, y = T.vectors("xy")
+        x, y = tt.vectors("xy")
-        a = T.scalar("a")
+        a = tt.scalar("a")
-        b = T.scalar("b")
+        b = tt.scalar("b")
        self._compile_and_check(
            [y, a, A, x, b],
-            [T.blas.gemv(y, a, A, x, b)],
+            [gemv(y, a, A, x, b)],
            [
                np.random.random((2,)).astype(config.floatX),
                np.asarray(0.5, dtype=config.floatX),
@@ -2433,21 +2439,21 @@ class TestInferShape(unittest_tools.InferShapeTester):
                np.random.random((3,)).astype(config.floatX),
                np.asarray(0.5, dtype=config.floatX),
            ],
-            T.blas.Gemv,
+            Gemv,
        )
    def test_ger(self):
-        A = T.matrix("A")
+        A = tt.matrix("A")
-        x, y = T.vectors("xy")
+        x, y = tt.vectors("xy")
-        a = T.scalar("a")
+        a = tt.scalar("a")
        self._compile_and_check(
            [A, a, x, y],
-            [T.blas.ger(A, a, x, y)],
+            [ger(A, a, x, y)],
            [
                np.random.random((2, 3)).astype(config.floatX),
                np.asarray(0.5, dtype=config.floatX),
                np.random.random((2,)).astype(config.floatX),
                np.random.random((3,)).astype(config.floatX),
            ],
-            T.blas.Ger,
+            Ger,
        )
--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -7,15 +7,14 @@ import pytest
 import numpy as np
 import theano
+import theano.tensor as tt
 import tests.unittest_tools as utt
 from copy import copy
 from theano import gof, scalar, config
-from theano import tensor
 from theano.tensor import TensorType, as_tensor_variable
 from theano.compile.mode import get_default_mode, Mode
 from theano.tensor.elemwise import (
@@ -24,7 +23,10 @@ from theano.tensor.elemwise import (
    DimShuffle,
    Prod,
    ProdWithoutZeros,
+    Sum,
 )
+from theano.tensor.type import values_eq_approx_remove_nan
+from theano.tensor.nnet import sigmoid
 from tests import unittest_tools
@@ -126,37 +128,37 @@ class TestReduceAxes:
    def test_sum_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.sum(a)
    def test_mean_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.mean(a)
    def test_max_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.max(a)
    def test_min_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.min(a)
    def test_argmax_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.argmax(a)
    def test_var_axes(self):
        axes = [None, 0, 1, [0, 1], np.array(1), [np.array(0), np.array(1)]]
        for a in axes:
-            x = tensor.matrix()
+            x = tt.matrix()
            x.var(a)
@@ -302,7 +304,7 @@ class TestBroadcast:
            assert (xv == yv).all()
    def test_fill_var(self):
-        x = tensor.matrix()
+        x = tt.matrix()
        x.fill(3)
    def test_fill_grad(self):
@@ -310,7 +312,7 @@ class TestBroadcast:
        # https://groups.google.com/d/topic/theano-users/nQshB8gUA6k/discussion
        x = TensorType(config.floatX, [0, 1, 0])("x")
        y = TensorType(config.floatX, [0, 1, 0])("y")
-        e = tensor.second(x, y)
+        e = tt.second(x, y)
        theano.grad(e.sum(), y)
    @pytest.mark.skipif(
@@ -414,7 +416,7 @@ class TestCAReduce(unittest_tools.InferShapeTester):
            f = theano.function([x], e, mode=mode)
            xv = np.asarray(np.random.rand(*xsh))
-            if dtype not in tensor.discrete_dtypes:
+            if dtype not in tt.discrete_dtypes:
                xv = np.asarray(xv, dtype=dtype)
            else:
                xv = np.asarray(xv < 0.5, dtype=dtype)
@@ -441,12 +443,12 @@ class TestCAReduce(unittest_tools.InferShapeTester):
                        axis2.append(a)
                assert len(axis2) == len(tosum)
                tosum = tuple(axis2)
-            if tensor_op == tensor.all:
+            if tensor_op == tt.all:
                for axis in reversed(sorted(tosum)):
                    zv = np.all(zv, axis)
                if len(tosum) == 0:
                    zv = zv != 0
-            elif tensor_op == tensor.any:
+            elif tensor_op == tt.any:
                for axis in reversed(sorted(tosum)):
                    zv = np.any(zv, axis)
                if len(tosum) == 0:
@@ -537,11 +539,9 @@ class TestCAReduce(unittest_tools.InferShapeTester):
            self.with_mode(Mode(linker="py"), scalar.maximum, dtype=dtype)
            self.with_mode(Mode(linker="py"), scalar.minimum, dtype=dtype)
            self.with_mode(
-                Mode(linker="py"), scalar.and_, dtype=dtype, tensor_op=tensor.all
+                Mode(linker="py"), scalar.and_, dtype=dtype, tensor_op=tt.all
-            )
-            self.with_mode(
-                Mode(linker="py"), scalar.or_, dtype=dtype, tensor_op=tensor.any
            )
+            self.with_mode(Mode(linker="py"), scalar.or_, dtype=dtype, tensor_op=tt.any)
        for dtype in ["int8", "uint8"]:
            self.with_mode(Mode(linker="py"), scalar.or_, dtype=dtype)
            self.with_mode(Mode(linker="py"), scalar.and_, dtype=dtype)
@@ -562,14 +562,14 @@ class TestCAReduce(unittest_tools.InferShapeTester):
                scalar.or_,
                dtype=dtype,
                test_nan=True,
-                tensor_op=tensor.any,
+                tensor_op=tt.any,
            )
            self.with_mode(
                Mode(linker="py"),
                scalar.and_,
                dtype=dtype,
                test_nan=True,
-                tensor_op=tensor.all,
+                tensor_op=tt.all,
            )
    @pytest.mark.skipif(
@@ -591,12 +591,8 @@ class TestCAReduce(unittest_tools.InferShapeTester):
        for dtype in ["bool", "floatX", "int8", "uint8"]:
            self.with_mode(Mode(linker="c"), scalar.minimum, dtype=dtype)
            self.with_mode(Mode(linker="c"), scalar.maximum, dtype=dtype)
-            self.with_mode(
+            self.with_mode(Mode(linker="c"), scalar.and_, dtype=dtype, tensor_op=tt.all)
-                Mode(linker="c"), scalar.and_, dtype=dtype, tensor_op=tensor.all
+            self.with_mode(Mode(linker="c"), scalar.or_, dtype=dtype, tensor_op=tt.any)
-            )
-            self.with_mode(
-                Mode(linker="c"), scalar.or_, dtype=dtype, tensor_op=tensor.any
-            )
        for dtype in ["bool", "int8", "uint8"]:
            self.with_mode(Mode(linker="c"), scalar.or_, dtype=dtype)
            self.with_mode(Mode(linker="c"), scalar.and_, dtype=dtype)
@@ -664,7 +660,7 @@ class TestProd:
        # second time, with some added complexity
        # verify_grad takes the sum of the matrices anyway
        def fn(x2):
-            return theano.tensor.sqr(Prod(axis=1)(x2))
+            return tt.sqr(Prod(axis=1)(x2))
        unittest_tools.verify_grad(fn, [x_val], mode=self.mode)
@@ -674,20 +670,20 @@ class TestProd:
        x_val = np.asarray(
            [[1.0, 2.0, 3.0], [0.0, 5.0, 6.0], [0.0, 0.0, 9.0]], dtype="float32"
        )
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        # sanity check
        p = Prod(axis=1)(x)
        # Uncomment this for debugging if needed
-        # x2 = theano.tensor.dmatrix()
+        # x2 = tt.dmatrix()
        # p2 = Prod(axis=1)(x2)
        # fn = theano.function([x, x2], [p - p2], mode=self.mode)
        # print("hand computed diff for each row")
        # x2_val = np.asarray([[1., 2., 3.003], [0.003, 5., 6], [
        #     0., 0., 9.01]])
        # print(fn(x_val, x2_val))
-        # fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
+        # fn2 = theano.function([x], [tt.grad(p.sum(), x)],
        #                       mode=self.mode)
        # print("real grad")
        # print(fn2(x_val))
@@ -700,10 +696,10 @@ class TestProd:
        # second time, with some added complexity
        # verify_grad takes the sum of the matrices anyway
        # def fn5(x5):
-        #    return theano.tensor.sqr(Prod(axis=1)(x5))
+        #    return tt.sqr(Prod(axis=1)(x5))
-        # x4 = theano.tensor.dmatrix()
+        # x4 = tt.dmatrix()
-        # p4 = theano.tensor.sqr(Prod(axis=1)(x4))
+        # p4 = tt.sqr(Prod(axis=1)(x4))
        # fn4 = theano.function([x4], p4)
        # print("with sqr")
        # print(fn4(x_val))
@@ -713,7 +709,7 @@ class TestProd:
    @pytest.mark.slow
    def test_prod_no_zeros_in_input(self):
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        x_val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="float32")
        pwz = Prod(axis=1, no_zeros_in_input=True)(x)
        fn = theano.function([x], pwz, mode=self.mode)
@@ -754,7 +750,7 @@ class TestProd:
        unittest_tools.verify_grad(second_deriv, [x_val], mode=self.mode)
    def test_prod_without_zeros(self):
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        x_val = np.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]], dtype="float32")
        pwz = ProdWithoutZeros(axis=1)(x)
        fn = theano.function([x], pwz, mode=self.mode)
@@ -766,14 +762,14 @@ class TestProd:
    @pytest.mark.xfail(raises=theano.gradient.NullTypeGradError)
    def test_prod_without_zeros_grad(self):
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        pwz_a1 = ProdWithoutZeros(axis=0)(x)
-        pwz_grad = theano.grad(theano.tensor.sum(pwz_a1), x)
+        pwz_grad = theano.grad(tt.sum(pwz_a1), x)
        theano.function([x], pwz_grad, mode=self.mode)
    @pytest.mark.slow
    def test_other_grad_tests(self):
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        x_val1 = np.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]], dtype="float32")
        x_val2 = np.array(
            [[1, 2, 0], [0, 5, 6], [7, 8, 9], [9, 10, 0]], dtype="float32"
@@ -781,7 +777,7 @@ class TestProd:
        rng = rng = np.random.RandomState(43)
        p = Prod(axis=1)
-        grad_p = theano.tensor.grad(p(x).sum(), x)
+        grad_p = tt.grad(p(x).sum(), x)
        grad_fn = theano.function([x], grad_p, mode=self.mode)
        assert np.allclose(
            grad_fn(x_val1), [[6.0, 3.0, 2.0], [30.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
@@ -792,7 +788,7 @@ class TestProd:
        )
        p_axis0 = Prod(axis=0)
-        grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x)
+        grad_p_axis0 = tt.grad(p_axis0(x).sum(), x)
        grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode)
        assert np.allclose(
            grad_fn_axis0(x_val2),
@@ -804,12 +800,12 @@ class TestProd:
            ],
        )
-        tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
+        tt.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
    def test_mul_without_zeros_zeros(self):
        a = np.zeros((3, 3))
-        x = theano.tensor.dmatrix()
+        x = tt.dmatrix()
        mul1 = ProdWithoutZeros(axis=0)(x)
@@ -838,35 +834,32 @@ class TestIsInfIsNan:
                [np.nan, np.inf, -np.inf, 0, 1, -1],
            ]
        ]
-        self.scalar = tensor.scalar()
+        self.scalar = tt.scalar()
-        self.vector = tensor.vector()
+        self.vector = tt.vector()
        self.mode = get_default_mode()
        if isinstance(self.mode, theano.compile.debugmode.DebugMode):
            # Disable the check preventing usage of NaN / Inf values.
            self.mode = copy(self.mode)
            self.mode.check_isfinite = False
-    def run_isfunc(self, isfunc):
+    def run_isfunc(self, tt_func, np_func):
-        for input in (self.scalar, self.vector):
+        for args in (self.scalar, self.vector):
-            theano_isfunc = theano.function(
+            theano_isfunc = theano.function([args], tt_func(args), mode=self.mode)
-                [input], getattr(tensor, isfunc)(input), mode=self.mode
-            )
-            numpy_isfunc = getattr(np, isfunc)
            for x in self.test_vals:
-                if (x.ndim == 0 and input is not self.scalar) or (
+                if (x.ndim == 0 and args is not self.scalar) or (
-                    x.ndim == 1 and input is not self.vector
+                    x.ndim == 1 and args is not self.vector
                ):
                    # We only test with the appropriate input type.
                    continue
                t_out = theano_isfunc(x)
-                n_out = numpy_isfunc(x)
+                n_out = np_func(x)
                assert (t_out == n_out).all(), (t_out, n_out)
    def test_isinf(self):
-        return self.run_isfunc("isinf")
+        self.run_isfunc(tt.isinf, np.isinf)
    def test_isnan(self):
-        return self.run_isfunc("isnan")
+        self.run_isfunc(tt.isnan, np.isnan)
 class TestReduceDtype:
@@ -882,7 +875,7 @@ class TestReduceDtype:
        for method in self.methods:
            for idx, dtype in enumerate(self.dtypes):
                axis = self.axes[idx % len(self.axes)]
-                x = tensor.matrix(dtype=dtype)
+                x = tt.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
                assert (
                    s.dtype
@@ -910,7 +903,7 @@ class TestReduceDtype:
        for method in self.methods:
            for idx, dtype in enumerate(self.dtypes):
                axis = self.axes[idx % len(self.axes)]
-                x = tensor.matrix(dtype=dtype)
+                x = tt.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
                assert (
                    s.owner.op.acc_dtype
@@ -942,7 +935,7 @@ class TestReduceDtype:
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
-                x = tensor.matrix(dtype=input_dtype)
+                x = tt.matrix(dtype=input_dtype)
                for output_dtype in self.dtypes:
                    # Only tests case where both input and output are complex.
                    icomplex = input_dtype.startswith("complex")
@@ -977,7 +970,7 @@ class TestReduceDtype:
                    if "complex" in input_dtype:
                        continue
                    # Check that we can take the gradient
-                    tensor.grad(var.sum(), x, disconnected_inputs="ignore")
+                    tt.grad(var.sum(), x, disconnected_inputs="ignore")
                    idx += 1
    def test_reduce_custom_acc_dtype(self):
@@ -987,7 +980,7 @@ class TestReduceDtype:
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
-                x = tensor.matrix(dtype=input_dtype)
+                x = tt.matrix(dtype=input_dtype)
                for acc_dtype in self.dtypes:
                    # If the accumulator is a complex, the gradient of the reduce will
                    # cast the complex to the input dtype. We can't call the normal
@@ -1002,8 +995,8 @@ class TestReduceDtype:
                    # We always allow int/uint inputs with float/complex outputs.
                    upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                    if acc_dtype == upcasted_dtype or (
-                        input_dtype in tensor.discrete_dtypes
+                        input_dtype in tt.discrete_dtypes
-                        and acc_dtype in tensor.continuous_dtypes
+                        and acc_dtype in tt.continuous_dtypes
                    ):
                        var = getattr(x, method)(acc_dtype=acc_dtype, axis=axis)
                        assert var.owner.op.acc_dtype == acc_dtype
@@ -1011,7 +1004,7 @@ class TestReduceDtype:
                        if "complex" in input_dtype:
                            continue
                        # Check that we can take the gradient
-                        tensor.grad(var.sum(), x, disconnected_inputs="ignore")
+                        tt.grad(var.sum(), x, disconnected_inputs="ignore")
                    else:
                        with pytest.raises(TypeError):
                            getattr(x(method), acc_dtype=acc_dtype, axis=axis)
@@ -1040,9 +1033,9 @@ class TestMeanDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(map(str, theano.scalar.all_types)):
            axis = axes[idx % len(axes)]
-            x = tensor.matrix(dtype=dtype)
+            x = tt.matrix(dtype=dtype)
            m = x.mean(axis=axis)
-            if dtype in tensor.discrete_dtypes:
+            if dtype in tt.discrete_dtypes:
                assert m.dtype == "float64"
            else:
                assert m.dtype == dtype, (m, m.dtype, dtype)
@@ -1059,7 +1052,7 @@ class TestMeanDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in map(str, theano.scalar.all_types):
-            x = tensor.matrix(dtype=input_dtype)
+            x = tt.matrix(dtype=input_dtype)
            for sum_dtype in map(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If the inner sum cannot be created, it will raise a
@@ -1070,7 +1063,7 @@ class TestMeanDtype:
                    pass
                else:
                    # Executed if no TypeError was raised
-                    if sum_dtype in tensor.discrete_dtypes:
+                    if sum_dtype in tt.discrete_dtypes:
                        assert mean_var.dtype == "float64", (mean_var.dtype, sum_dtype)
                    else:
                        assert mean_var.dtype == sum_dtype, (mean_var.dtype, sum_dtype)
@@ -1086,11 +1079,11 @@ class TestMeanDtype:
                    if "complex" in mean_var.dtype:
                        continue
                    try:
-                        tensor.grad(mean_var.sum(), x, disconnected_inputs="ignore")
+                        tt.grad(mean_var.sum(), x, disconnected_inputs="ignore")
                    except NotImplementedError:
                        # TrueDiv does not seem to have a gradient when
                        # the numerator is complex.
-                        if mean_var.dtype in tensor.complex_dtypes:
+                        if mean_var.dtype in tt.complex_dtypes:
                            pass
                        else:
                            raise
@@ -1114,7 +1107,7 @@ class TestProdWithoutZerosDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(map(str, theano.scalar.all_types)):
            axis = axes[idx % len(axes)]
-            x = ProdWithoutZeros(axis=axis)(tensor.matrix(dtype=dtype))
+            x = ProdWithoutZeros(axis=axis)(tt.matrix(dtype=dtype))
            assert (
                x.dtype
                == dict(
@@ -1135,7 +1128,7 @@ class TestProdWithoutZerosDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(map(str, theano.scalar.all_types)):
            axis = axes[idx % len(axes)]
-            x = tensor.matrix(dtype=dtype)
+            x = tt.matrix(dtype=dtype)
            p = ProdWithoutZeros(axis=axis)(x)
            assert (
                p.owner.op.acc_dtype
@@ -1168,7 +1161,7 @@ class TestProdWithoutZerosDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in map(str, theano.scalar.all_types):
-            x = tensor.matrix(dtype=input_dtype)
+            x = tt.matrix(dtype=input_dtype)
            for output_dtype in map(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                prod_woz_var = ProdWithoutZeros(axis=axis, dtype=output_dtype)(x)
@@ -1189,15 +1182,15 @@ class TestProdWithoutZerosDtype:
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in map(str, theano.scalar.all_types):
-            x = tensor.matrix(dtype=input_dtype)
+            x = tt.matrix(dtype=input_dtype)
            for acc_dtype in map(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If acc_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if acc_dtype == upcasted_dtype or (
-                    input_dtype in tensor.discrete_dtypes
+                    input_dtype in tt.discrete_dtypes
-                    and acc_dtype in tensor.continuous_dtypes
+                    and acc_dtype in tt.continuous_dtypes
                ):
                    prod_woz_var = ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype)(x)
                    assert prod_woz_var.owner.op.acc_dtype == acc_dtype
@@ -1220,7 +1213,7 @@ class TestBitOpReduceGrad:
        self.rng = np.random.RandomState(unittest_tools.fetch_seed())
    def test_all_grad(self):
-        x = tensor.bmatrix("x")
+        x = tt.bmatrix("x")
        x_all = x.all()
        gx = theano.grad(x_all, x)
        f = theano.function([x], gx)
@@ -1231,7 +1224,7 @@ class TestBitOpReduceGrad:
            assert np.all(gx_val == 0)
    def test_any_grad(self):
-        x = tensor.bmatrix("x")
+        x = tt.bmatrix("x")
        x_all = x.any()
        gx = theano.grad(x_all, x)
        f = theano.function([x], gx)
@@ -1244,8 +1237,8 @@ class TestBitOpReduceGrad:
 class TestElemwise(unittest_tools.InferShapeTester):
    def test_elemwise_grad_bool(self):
-        x = theano.tensor.scalar(dtype="bool")
+        x = tt.scalar(dtype="bool")
-        y = theano.tensor.bscalar()
+        y = tt.bscalar()
        z = x * y
        dx, dy = theano.grad(z, [x, y])
@@ -1279,7 +1272,7 @@ class TestElemwise(unittest_tools.InferShapeTester):
        # Elemwise.perform used to compute the product
        # of input shapes to check if there was a zero in them,
        # it overflowed in this case.
-        a, b, c, d, e, f = tensor.vectors("abcdef")
+        a, b, c, d, e, f = tt.vectors("abcdef")
        s = a + b + c + d + e + f
        g = theano.function(
            [a, b, c, d, e, f], s, mode=theano.compile.Mode(linker="py")
@@ -1315,7 +1308,7 @@ def test_clip_grad():
    # test the gradient of clip
    def func(x, y, z):
-        return theano.tensor.clip(x, y, z)
+        return tt.clip(x, y, z)
    # use an x value less than y, an x value between y and z, and an x value
    # greater than z
@@ -1323,35 +1316,38 @@ def test_clip_grad():
 def test_grad_useless_sum():
-    # Test absence of useless sum.
+    """
-    #
+    Test absence of useless sum.
-    # When an operation (such as T.mul) is done on a broadcastable vector and
-    # a matrix, the gradient in backward path is computed for the broadcasted
+    When an operation (such as `theano.tensor.mul`) is done on a broadcastable
-    # vector. So a sum reverts the broadcasted vector to a vector. In the case
+    vector and a matrix, the gradient in backward path is computed for the
-    # of operations on two broadcastable vectors, the sum should not be generated.
+    broadcasted vector. So a sum reverts the broadcasted vector to a vector. In
-    #
+    the case of operations on two broadcastable vectors, the sum should not be
-    # This test checks whether there is a useless sum in the gradient
+    generated.
-    # computations.
+    This test checks whether there is a useless sum in the gradient
+    computations.
+    """
    mode = theano.compile.get_default_mode().including("canonicalize")
    mode.check_isfinite = False
    x = TensorType(theano.config.floatX, (True,))("x")
-    l = tensor.log(1.0 - tensor.nnet.sigmoid(x))[0]
+    l = tt.log(1.0 - sigmoid(x))[0]
-    g = tensor.grad(l, x)
+    g = tt.grad(l, x)
    nodes = theano.gof.graph.ops([x], [g])
    f = theano.function([x], g, mode=mode)
    test_values = [-100, -1, 0, 1, 100]
    outputs = []
    old_values_eq_approx = staticmethod(TensorType.values_eq_approx)
-    TensorType.values_eq_approx = staticmethod(tensor.type.values_eq_approx_remove_nan)
+    TensorType.values_eq_approx = staticmethod(values_eq_approx_remove_nan)
    try:
        for test_value in test_values:
            outputs.append(f(np.array([test_value]).astype("float32")))
    finally:
        TensorType.values_eq_approx = old_values_eq_approx
-    assert not any([isinstance(node.op, theano.tensor.elemwise.Sum) for node in nodes])
+    assert not any([isinstance(node.op, Sum) for node in nodes])
    assert np.allclose(
        outputs, [[-3.72007598e-44], [-0.26894142], [-0.5], [-0.73105858], [-1.0]]
    )
@@ -1360,22 +1356,21 @@ def test_grad_useless_sum():
 def test_elemwise_grad_broadcast():
    # This crashed in the past.
-    x = tensor.tensor(dtype="float32", broadcastable=(True, False, False, False))
+    x = tt.tensor(dtype="float32", broadcastable=(True, False, False, False))
-    y = tensor.tensor(dtype="float32", broadcastable=(True, True, False, False))
+    y = tt.tensor(dtype="float32", broadcastable=(True, True, False, False))
-    theano.grad(theano.tensor.tanh(x).sum(), x)
+    theano.grad(tt.tanh(x).sum(), x)
-    theano.grad(theano.tensor.tanh(x + y).sum(), y)
+    theano.grad(tt.tanh(x + y).sum(), y)
-    theano.grad(theano.tensor.tanh(x + y).sum(), [x, y])
+    theano.grad(tt.tanh(x + y).sum(), [x, y])
 def test_clip_grad_int():
    # test that integers don't crash clip gradient
-    x = tensor.iscalar()
+    x = tt.iscalar()
-    y = tensor.iscalar()
+    y = tt.iscalar()
-    z = tensor.iscalar()
+    z = tt.iscalar()
-    c = tensor.clip(x, y, z)
+    c = tt.clip(x, y, z)
-    tensor.grad(c, [x, y, z])
+    tt.grad(c, [x, y, z])
 def test_not_implemented_elemwise_grad():
@@ -1394,20 +1389,10 @@ def test_not_implemented_elemwise_grad():
            dy_dx = n
            return [theano.gradient.grad_not_implemented(self, 0, n), gz * dy_dx]
-    test_op = tensor.Elemwise(TestOp())
+    test_op = tt.Elemwise(TestOp())
-    x = tensor.scalar()
+    x = tt.scalar()
-    # The call to `grad` used to crash.
+    assert isinstance(tt.grad(test_op(2, x), x), gof.graph.Variable)
-    tensor.grad(test_op(2, x), x)
-    # Verify that trying to use the not implemented gradient fails.
-    try:
-        tensor.grad(test_op(x, 2), x)
-        assert False
-    except theano.gradient.NullTypeGradError:
-        pass
-if __name__ == "__main__":
+    # Verify that trying to use the not implemented gradient fails.
+    with pytest.raises(theano.gradient.NullTypeGradError):
-    t = TestElemwise("setUp")
+        tt.grad(test_op(x, 2), x)
-    t.setup_method()
-    t.test_infer_shape()
--- a/tests/tensor/test_fft.py
+++ b/tests/tensor/test_fft.py
 import numpy as np
 import pytest
 import theano
+import theano.tensor as tt
-from theano import tensor as T
 from theano.tensor import fft
 from tests import unittest_tools as utt
@@ -31,7 +32,7 @@ class TestFFT:
    def test_1Drfft(self):
        inputs_val = np.random.random((1, N)).astype(theano.config.floatX)
-        x = T.matrix("x")
+        x = tt.matrix("x")
        rfft = fft.rfft(x)
        f_rfft = theano.function([x], rfft)
        res_rfft = f_rfft(inputs_val)

--- a/tests/tensor/test_gc.py
+++ b/tests/tensor/test_gc.py
-import numpy as np
+import time
 import six.moves.cPickle as pickle
+import numpy as np
 import theano
-from theano import tensor as T
+import theano.tensor as tt
-import time
 def test_no_reuse():
-    x = T.lvector()
+    x = tt.lvector()
-    y = T.lvector()
+    y = tt.lvector()
    f = theano.function([x, y], x + y)
    # provide both inputs in the first call
@@ -22,7 +24,7 @@ def test_no_reuse():
 def test_gc_never_pickles_temporaries():
-    x = T.dvector()
+    x = tt.dvector()
    r = x
    for i in range(2):  # TODO: 30 causes like LONG compilation due to MERGE
@@ -105,7 +107,7 @@ def test_merge_opt_runtime():
    #
    # Ironically, there is actually no merging to do in this graph.
-    x = T.dvector()
+    x = tt.dvector()
    r = x
    for i in range(50):
        r = r + r / 10

--- a/tests/tensor/test_merge.py
+++ b/tests/tensor/test_merge.py
 import numpy as np
+import theano.tensor.basic as tt
 from theano.gof.type import Type
 from theano.gof.graph import Variable, Apply
 from theano.gof.op import Op
 from theano.gof.opt import MergeOptimizer
-from theano.gof.fg import FunctionGraph as Env
+from theano.gof.fg import FunctionGraph
-import theano.tensor.basic as T
-def as_variable(x):
+def is_variable(x):
    if not isinstance(x, Variable):
        raise TypeError("not a Variable", x)
    return x
@@ -30,7 +31,7 @@ class MyOp(Op):
        self.x = x
    def make_node(self, *inputs):
-        inputs = list(map(as_variable, inputs))
+        inputs = list(map(is_variable, inputs))
        for input in inputs:
            if not isinstance(input.type, MyType):
                raise Exception("Error 1")
@@ -65,9 +66,9 @@ def test_merge_with_weird_eq():
    # numpy arrays don't compare equal like other python objects
    # SCALAR CASE
-    x = T.constant(np.asarray(1), name="x")
+    x = tt.constant(np.asarray(1), name="x")
-    y = T.constant(np.asarray(1), name="y")
+    y = tt.constant(np.asarray(1), name="y")
-    g = Env([x, y], [x + y])
+    g = FunctionGraph([x, y], [x + y])
    MergeOptimizer().optimize(g)
    assert len(g.apply_nodes) == 1
@@ -77,9 +78,9 @@ def test_merge_with_weird_eq():
    # NONSCALAR CASE
    # This was created to test TensorConstantSignature
-    x = T.constant(np.ones(5), name="x")
+    x = tt.constant(np.ones(5), name="x")
-    y = T.constant(np.ones(5), name="y")
+    y = tt.constant(np.ones(5), name="y")
-    g = Env([x, y], [x + y])
+    g = FunctionGraph([x, y], [x + y])
    MergeOptimizer().optimize(g)
    assert len(g.apply_nodes) == 1

--- a/tests/tensor/test_mlp.py
+++ b/tests/tensor/test_mlp.py
 """
-This is a minimized version of the mlp.py in the tutorial. We removed stuff that make this mlp don't work.
+This is a minimized version of the mlp.py in the tutorial. We removed stuff
-But this test a bug that we saw. This bug made the Shape_i object not being lifted, that caused the CrossentropySoftmax... op not being inserted.
+that make this mlp don't work.  But this test a bug that we saw. This bug made
+the Shape_i object not being lifted, that caused the CrossentropySoftmax... op
+not being inserted.
 """
 __docformat__ = "restructedtext en"
 from collections import OrderedDict
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 def gen_data():
@@ -49,7 +49,7 @@ def gen_data():
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
-        return shared_x, T.cast(shared_y, "int32")
+        return shared_x, tt.cast(shared_y, "int32")
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
@@ -96,11 +96,11 @@ class LogisticRegression(object):
        )
        # compute vector of class-membership probabilities in symbolic form
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W))
+        self.p_y_given_x = tt.nnet.softmax(tt.dot(input, self.W))
        # compute prediction as class whose probability is maximal in
        # symbolic form
-        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
+        self.y_pred = tt.argmax(self.p_y_given_x, axis=1)
        # parameters of the model
        self.params = [self.W]
@@ -128,11 +128,11 @@ class LogisticRegression(object):
        # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
        # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
-        return T.log(self.p_y_given_x[T.arange(y.shape[0]), y])
+        return tt.log(self.p_y_given_x[tt.arange(y.shape[0]), y])
 class HiddenLayer(object):
-    def __init__(self, rng, input, n_in, n_out, activation=T.tanh, name_prefix=""):
+    def __init__(self, rng, input, n_in, n_out, activation=tt.tanh, name_prefix=""):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
@@ -174,7 +174,7 @@ class HiddenLayer(object):
        )
        self.W = theano.shared(value=W_values, name=name_prefix + "W")
-        self.output = T.dot(input, self.W)
+        self.output = tt.dot(input, self.W)
        # parameters of the model
        self.params = [self.W]
@@ -222,7 +222,7 @@ class MLP(object):
            input=input,
            n_in=n_in,
            n_out=n_hidden,
-            activation=T.tanh,
+            activation=tt.tanh,
            name_prefix="hid_",
        )
@@ -284,9 +284,9 @@ def test_mlp():
    # print '... building the model'
    # allocate symbolic variables for the data
-    index = T.lscalar()  # index to a [mini]batch
+    index = tt.lscalar()  # index to a [mini]batch
-    x = T.matrix("x")  # the data is presented as rasterized images
+    x = tt.matrix("x")  # the data is presented as rasterized images
-    y = T.ivector("y")  # the labels are presented as 1D vector of
+    y = tt.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels
    rng = np.random.RandomState(1234)
@@ -303,7 +303,7 @@ def test_mlp():
    # the resulting gradients will be stored in a list gparams
    gparams = []
    for param in classifier.params:
-        gparam = T.grad(cost, param)
+        gparam = tt.grad(cost, param)
        gparams.append(gparam)
    # Some optimizations needed are tagged with 'fast_run'
@@ -312,7 +312,7 @@ def test_mlp():
    updates2 = OrderedDict()
-    updates2[classifier.hiddenLayer.params[0]] = T.grad(
+    updates2[classifier.hiddenLayer.params[0]] = tt.grad(
        cost, classifier.hiddenLayer.params[0]
    )
    train_model = theano.function(
@@ -328,7 +328,7 @@ def test_mlp():
    # theano.printing.debugprint(train_model, print_type=True)
    assert any(
        [
-            isinstance(i.op, T.nnet.CrossentropySoftmax1HotWithBiasDx)
+            isinstance(i.op, tt.nnet.CrossentropySoftmax1HotWithBiasDx)
            for i in train_model.maker.fgraph.toposort()
        ]
    )
@@ -348,11 +348,7 @@ def test_mlp():
    # theano.printing.debugprint(train_model, print_type=True)
    assert any(
        [
-            isinstance(i.op, T.nnet.CrossentropySoftmax1HotWithBiasDx)
+            isinstance(i.op, tt.nnet.CrossentropySoftmax1HotWithBiasDx)
            for i in train_model.maker.fgraph.toposort()
        ]
    )
-if __name__ == "__main__":
-    test_mlp()
--- a/tests/tensor/test_opt.py
+++ b/tests/tensor/test_opt.py
--- a/tests/tensor/test_subtensor.py
+++ b/tests/tensor/test_subtensor.py
@@ -7,7 +7,7 @@ import numpy as np
 import theano
 import theano.scalar as scal
-import theano.tensor as tensor
+import theano.tensor as tt
 from numpy.testing import assert_array_equal
@@ -74,7 +74,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
    """
    def setup_method(self):
-        self.shared = tensor._shared
+        self.shared = _shared
        self.dtype = theano.config.floatX
        mode = theano.compile.mode.get_default_mode()
        self.mode = mode.including("local_useless_subtensor")
@@ -312,7 +312,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
            (lambda: n[: (2 ** 63)])()
    def test_list_slice(self):
-        x = tensor.arange(100).reshape((5, 5, 4))
+        x = tt.arange(100).reshape((5, 5, 4))
        res = x[[slice(1, -1)] * x.ndim].eval()
        x = np.arange(100).reshape((5, 5, 4))
        np.allclose(res, x[[slice(1, -1)] * x.ndim])
@@ -567,7 +567,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
        n = self.shared(data)
        z = scal.constant(subi).astype("int32")
        t = n[z:, z]
-        gn = tensor.grad(tensor.sum(tensor.exp(t)), n)
+        gn = tt.grad(tt.sum(tt.exp(t)), n)
        f = inplace_func([], gn, mode=self.mode)
        topo = f.maker.fgraph.toposort()
@@ -598,7 +598,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
                mv = np.asarray(rand(*m_shape), dtype=self.dtype)
                t = op(n[:z, :z], m)
-                gn, gm = tensor.grad(tensor.sum(t), [n, m])
+                gn, gm = tt.grad(tt.sum(t), [n, m])
                utt.verify_grad(lambda m: op(n[:z, :z], m), [mv], mode=self.mode)
                utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data], mode=self.mode)
@@ -606,7 +606,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
        data = np.asarray(rand(2, 3), dtype=self.dtype)
        n = self.shared(data)
        t = n[1, 0]
-        gn = tensor.grad(tensor.sum(tensor.exp(t)), n)
+        gn = tt.grad(tt.sum(tt.exp(t)), n)
        f = self.function([], gn)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op, DeepCopyOp)]
@@ -632,7 +632,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
            # optimized for that case.
            (rand(4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
            # Test with TensorConstant index.
-            (rand(4, 2, 3), tensor.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+            (rand(4, 2, 3), tt.constant([3, 3, 1, 1, 2, 2, 0, 0])),
        ]:
            data = np.asarray(data, dtype=self.dtype)
            n = self.shared(data)
@@ -717,7 +717,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
    def test_adv_sub1_broadcast(self):
        v = np.arange(3, dtype=self.dtype).reshape((1, 3))
        n = self.shared(v * 5, broadcastable=(True, False))
-        idx = tensor.lvector()
+        idx = lvector()
        t = n[idx]
        assert isinstance(t.owner.op, AdvancedSubtensor1)
@@ -779,13 +779,13 @@ class TestSubtensor(utt.OptimizationTestMixin):
        # test set_subtensor broadcast
        self.dtype = "float32"
-        x = tensor.tensor4("x", dtype=self.dtype)
+        x = tt.tensor4("x", dtype=self.dtype)
        indexes = theano.shared(np.int32([1, 2, 3, 4]))
        W = self.shared(np.random.random((10, 10, 3, 3)).astype(self.dtype))
        h = x + W
-        h = tensor.set_subtensor(h[indexes], h[indexes])
+        h = tt.set_subtensor(h[indexes], h[indexes])
-        g = tensor.grad(h.sum(), W)
+        g = tt.grad(h.sum(), W)
        N = 2
        if (
            theano.config.mode == "FAST_COMPILE"
@@ -800,7 +800,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
        # The idx can be a broadcastable vector.
        ones = np.ones((4, 3), dtype=self.dtype)
        n = self.shared(ones * 5)
-        idx = tensor.TensorType(dtype="int64", broadcastable=(True,))()
+        idx = tt.TensorType(dtype="int64", broadcastable=(True,))()
        assert idx.type.broadcastable == (True,)
        t = n[idx]
        assert isinstance(t.owner.op, AdvancedSubtensor1)
@@ -849,9 +849,9 @@ class TestSubtensor(utt.OptimizationTestMixin):
        v_data = np.array(np.arange(5), dtype=self.dtype)
        t_data = self.shared(v_data)
-        start = tensor.iscalar("b")
+        start = iscalar("b")
-        stop = tensor.iscalar("e")
+        stop = iscalar("e")
-        step = tensor.iscalar("s")
+        step = iscalar("s")
        f = self.function(
            [start, stop, step],
            t_data[start:stop:step].shape,
@@ -866,18 +866,18 @@ class TestSubtensor(utt.OptimizationTestMixin):
                    assert np.all(f(start, stop, step) == v_data[start:stop:step].shape)
    def test_slice_canonical_form_0(self):
-        start = tensor.iscalar("b")
+        start = iscalar("b")
-        stop = tensor.iscalar("e")
+        stop = iscalar("e")
-        step = tensor.iscalar("s")
+        step = iscalar("s")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(start, stop, step), length)
        f = self.function(
            [start, stop, step, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -895,17 +895,17 @@ class TestSubtensor(utt.OptimizationTestMixin):
                    assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_1(self):
-        stop = tensor.iscalar("e")
+        stop = iscalar("e")
-        step = tensor.iscalar("s")
+        step = iscalar("s")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(None, stop, step), length)
        f = self.function(
            [stop, step, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -922,17 +922,17 @@ class TestSubtensor(utt.OptimizationTestMixin):
                assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_2(self):
-        start = tensor.iscalar("b")
+        start = iscalar("b")
-        step = tensor.iscalar("s")
+        step = iscalar("s")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(start, None, step), length)
        f = self.function(
            [start, step, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -949,17 +949,17 @@ class TestSubtensor(utt.OptimizationTestMixin):
                assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_3(self):
-        start = tensor.iscalar("b")
+        start = iscalar("b")
-        stop = tensor.iscalar("e")
+        stop = iscalar("e")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(start, stop, None), length)
        f = self.function(
            [start, stop, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -976,16 +976,16 @@ class TestSubtensor(utt.OptimizationTestMixin):
                assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_4(self):
-        step = tensor.iscalar("s")
+        step = iscalar("s")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(None, None, step), length)
        f = self.function(
            [step, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -1001,16 +1001,16 @@ class TestSubtensor(utt.OptimizationTestMixin):
            assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_5(self):
-        start = tensor.iscalar("b")
+        start = iscalar("b")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(start, None, None), length)
        f = self.function(
            [start, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -1026,16 +1026,16 @@ class TestSubtensor(utt.OptimizationTestMixin):
            assert np.all(t_out.shape == v_out.shape)
    def test_slice_canonical_form_6(self):
-        stop = tensor.iscalar("e")
+        stop = iscalar("e")
-        length = tensor.iscalar("l")
+        length = iscalar("l")
        cnf = get_canonical_form_slice(slice(None, stop, None), length)
        f = self.function(
            [stop, length],
            [
-                tensor.as_tensor_variable(cnf[0].start),
+                tt.as_tensor_variable(cnf[0].start),
-                tensor.as_tensor_variable(cnf[0].stop),
+                tt.as_tensor_variable(cnf[0].stop),
-                tensor.as_tensor_variable(cnf[0].step),
+                tt.as_tensor_variable(cnf[0].step),
-                tensor.as_tensor_variable(cnf[1]),
+                tt.as_tensor_variable(cnf[1]),
            ],
            N=0,
            op=subtensor_ops,
@@ -1057,7 +1057,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
            # Should stay on the cpu.
            idx_ = _shared(np.asarray(idx))
            t = n[idx_]
-            gn = tensor.grad(tensor.sum(tensor.exp(t)), n)
+            gn = tt.grad(tt.sum(tt.exp(t)), n)
            f = self.function([], [gn, gn.shape], op=AdvancedIncSubtensor1)
            topo = f.maker.fgraph.toposort()
            if not self.fast_compile:
@@ -1083,13 +1083,13 @@ class TestSubtensor(utt.OptimizationTestMixin):
            assert np.allclose(gshape, data.shape)
            def fct(t):
-                return tensor.sum(t[idx_])
+                return tt.sum(t[idx_])
            utt.verify_grad(fct, [data], mode=self.mode)
            # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
            def fct2(t):
-                return tensor.grad(tensor.sum(t[idx_]), t)
+                return tt.grad(tt.sum(t[idx_]), t)
            utt.verify_grad(fct2, [data], mode=self.mode)
@@ -1195,11 +1195,11 @@ class TestSubtensor(utt.OptimizationTestMixin):
                        # Symbolic variable to be incremented.
                        # We create a new one every time in order not to
                        # have duplicated variables in the function's inputs
-                        data_var = tensor.TensorType(
+                        data_var = tt.TensorType(
                            broadcastable=[False] * data_n_dims, dtype=self.dtype
                        )()
                        # Symbolic variable with rows to be incremented.
-                        idx_var = tensor.vector(dtype="int64")
+                        idx_var = vector(dtype="int64")
                        n_to_inc = rng.randint(data_shape[0])
                        if (
                            n_to_inc == 1
@@ -1218,7 +1218,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
                        )
                        idx_num = idx_num.astype("int64")
                        # Symbolic variable with increment value.
-                        inc_var = tensor.TensorType(
+                        inc_var = tt.TensorType(
                            broadcastable=[False] * inc_n_dims, dtype=self.dtype
                        )()
                        # Trick for the case where `inc_shape` is the same as
@@ -1302,8 +1302,8 @@ class TestSubtensor(utt.OptimizationTestMixin):
        # Test case provided (and bug detected, gh-607) by John Salvatier
        m = matrix("m")
        gv = np.array([0, 1, 3])
-        g = tensor.constant(gv)
+        g = tt.constant(gv)
-        i = tensor.lvector("i")
+        i = lvector("i")
        # s1 used to fail
        s1 = m[gv, i]
@@ -1437,7 +1437,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
            config.warn.inc_set_subtensor1 = orig_warn
    def test_take(self):
-        a = tensor.matrix()
+        a = matrix()
        f = theano.function(
            [a], a.take(0, axis=-1), allow_input_downcast=True, mode=self.mode
        )
@@ -1451,12 +1451,12 @@ class TestIncSubtensor1:
    def setup_method(self):
        self.rng = np.random.RandomState(seed=utt.fetch_seed())
-        self.s = tensor.iscalar()
+        self.s = iscalar()
-        self.v = tensor.fvector()
+        self.v = tt.fvector()
-        self.m = tensor.dmatrix()
+        self.m = dmatrix()
-        self.t = tensor.ctensor3()
+        self.t = ctensor3()
-        self.adv1q = tensor.lvector()  # advanced 1d query
+        self.adv1q = lvector()  # advanced 1d query
    def test_cant_adv_idx_into_scalar(self):
        with pytest.raises(IndexError):
@@ -1499,7 +1499,7 @@ class TestIncSubtensor1:
            (lambda: inc_subtensor(self.v[self.adv1q](fmatrix())))()
    def test_matrix_idx(self):
-        idx = tensor.lmatrix()
+        idx = lmatrix()
        a = self.m[idx]
        a2 = inc_subtensor(a, a)
        f = theano.function([self.m, idx], a2)
@@ -1514,9 +1514,9 @@ class TestIncSubtensor1:
        utt.assert_allclose(a2val[3], mval[3] * 2)
    def test_inc_bcastableidx(self):
-        idx = tensor.constant([0])
+        idx = tt.constant([0])
-        c_inc = tensor.col()
+        c_inc = tt.col()
-        m_inc = tensor.matrix()
+        m_inc = matrix()
        out1 = inc_subtensor(self.m[:, idx], c_inc)
        out2 = inc_subtensor(self.m[:, idx], m_inc)
@@ -1532,7 +1532,7 @@ class TestAdvancedSubtensor:
    """Test inc_subtensor and set_subtensor."""
    def setup_method(self):
-        self.shared = tensor._shared
+        self.shared = _shared
        self.dtype = theano.config.floatX
        self.mode = theano.compile.mode.get_default_mode()
@@ -1552,10 +1552,10 @@ class TestAdvancedSubtensor:
        def check(idx, y_val, x_val, true):
            x = self.shared(x_val, name="x")
-            y = tensor.tensor(
+            y = tt.tensor(
                dtype="float32", broadcastable=(False,) * len(y_val.shape), name="y"
            )
-            sym_idx = [tensor.as_tensor_variable(ix) for ix in idx]
+            sym_idx = [tt.as_tensor_variable(ix) for ix in idx]
            expr = advanced_inc_subtensor(x, y, *sym_idx)
            f = theano.function([y], expr, mode=self.mode)
            rval = f(y_val)
@@ -1628,7 +1628,7 @@ class TestAdvancedSubtensor:
            # optimized for that case.
            (rand(4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
            # Test with TensorConstant index.
-            (rand(2, 4, 3), tensor.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+            (rand(2, 4, 3), tt.constant([3, 3, 1, 1, 2, 2, 0, 0])),
        ]:
            data = np.asarray(data, dtype=self.dtype)
            n = self.shared(data)
@@ -1704,7 +1704,7 @@ class TestAdvancedSubtensor:
        subt = self.m[self.ix1, self.ix12]
        a = inc_subtensor(subt, subt)
-        typ = tensor.TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        typ = tt.TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
        assert a.type == typ, (a.type, typ)
        f = theano.function(
            [self.m, self.ix1, self.ix12], a, allow_input_downcast=True, mode=self.mode
@@ -1717,7 +1717,7 @@ class TestAdvancedSubtensor:
    def test_inc_adv_subtensor_with_broadcasting(self):
        inc = dscalar()
        a = inc_subtensor(self.m[self.ix1, self.ix12], inc)
-        g_inc = tensor.grad(a.sum(), inc)
+        g_inc = tt.grad(a.sum(), inc)
        assert a.type == self.m.type, (a.type, self.m.type)
        f = theano.function(
@@ -1737,7 +1737,7 @@ class TestAdvancedSubtensor:
    def test_inc_adv_subtensor1_with_broadcasting(self):
        inc = dscalar()
        a = inc_subtensor(self.m[self.ix1], inc)
-        g_inc = tensor.grad(a.sum(), inc)
+        g_inc = tt.grad(a.sum(), inc)
        assert a.type == self.m.type, (a.type, self.m.type)
        f = theano.function(
@@ -1782,8 +1782,8 @@ class TestAdvancedSubtensor:
        rng = np.random.RandomState(utt.fetch_seed())
        a = rng.uniform(size=(3, 3))
        b = theano.shared(a)
-        i = tensor.iscalar()
+        i = iscalar()
-        j = tensor.iscalar()
+        j = iscalar()
        z = b[[i, j], :]
        f1 = theano.function([i, j], z, mode=self.mode)
        cmd = f1(0, 1) == a[[0, 1], :]
@@ -1791,7 +1791,7 @@ class TestAdvancedSubtensor:
        aa = rng.uniform(size=(4, 2, 3))
        bb = theano.shared(aa)
-        k = tensor.iscalar()
+        k = iscalar()
        z = bb[[i, j, k], :, i:k]
        f2 = theano.function([i, j, k], z, mode=self.mode)
        cmd = f2(0, 1, 2) == aa[[0, 1, 2], :, 0:2]
@@ -1799,7 +1799,7 @@ class TestAdvancedSubtensor:
    def test_adv_sub_3d(self):
        # Reported in https://github.com/Theano/Theano/issues/5674
-        X = tensor.tensor3("X")
+        X = tt.tensor3("X")
        xx = np.zeros((3, 2, 2), config.floatX)
        for i in range(3):
@@ -1821,7 +1821,7 @@ class TestAdvancedSubtensor:
    def test_adv_sub_slice(self):
        # Reported in https://github.com/Theano/Theano/issues/5898
        var = self.shared(np.zeros([3, 3], dtype=config.floatX))
-        slc = tensor.slicetype()
+        slc = tt.slicetype()
        f = theano.function([slc], var[slc], mode=self.mode)
        s = slice(1, 3)
        f(s)
@@ -1833,7 +1833,7 @@ class TestAdvancedSubtensor:
        var = self.shared(var_v)
        idx1_v = rng.randint(0, 61, size=(5, 4)).astype("int32")
        idx1 = self.shared(idx1_v)
-        idx2 = tensor.arange(4)
+        idx2 = tt.arange(4)
        out = var[:, idx1, idx2]
        f = theano.function([], out, mode=self.mode)
        out_v = f()
@@ -1845,8 +1845,8 @@ class TestAdvancedSubtensor:
    def test_grad(self):
        ones = np.ones((1, 3), dtype=self.dtype)
        n = self.shared(ones * 5, broadcastable=(True, False))
-        idx = tensor.lvector()
+        idx = lvector()
-        idx2 = tensor.lvector()
+        idx2 = lvector()
        t = n[idx, idx2]
        assert isinstance(t.owner.op, AdvancedSubtensor)
@@ -1883,7 +1883,7 @@ class TestAdvancedSubtensor:
        # Test boolean gradients
        def fun(x, y):
            return advanced_inc_subtensor(
-                x, y, tensor.as_tensor(np.array([[True, False], [False, True]]))
+                x, y, tt.as_tensor(np.array([[True, False], [False, True]]))
            )
        utt.verify_grad(
@@ -1897,7 +1897,7 @@ class TestAdvancedSubtensor:
        def fun(x, y):
            return advanced_set_subtensor(
-                x, y, tensor.as_tensor(np.array([[True, False], [False, True]]))
+                x, y, tt.as_tensor(np.array([[True, False], [False, True]]))
            )
        utt.verify_grad(
@@ -2216,7 +2216,7 @@ class TestInferShape(utt.InferShapeTester):
            check_topo=False,
        )
-        abs_res = n[~tensor.isinf(n)]
+        abs_res = n[~tt.isinf(n)]
        assert abs_res.broadcastable == (False,)
@@ -2239,7 +2239,7 @@ def test_indexed_result_shape():
        if isinstance(x, (slice, type(None))):
            return x
        else:
-            return tensor.as_tensor(x)
+            return tt.as_tensor(x)
    def bcast_shape_tuple(x):
        if not hasattr(x, "shape"):
@@ -2250,14 +2250,14 @@ def test_indexed_result_shape():
    def compare_index_shapes(test_array, test_idx):
        res = indexed_result_shape(
-            tensor.as_tensor(test_array).shape, [idx_as_tensor(i) for i in test_idx]
+            tt.as_tensor(test_array).shape, [idx_as_tensor(i) for i in test_idx]
        )
        exp_res = test_array[test_idx].shape
        assert np.array_equal(tuple(get_test_value(r) for r in res), exp_res)
        # Test shape-only version
        res = indexed_result_shape(
-            tensor.as_tensor(test_array).shape,
+            tt.as_tensor(test_array).shape,
            [bcast_shape_tuple(idx_as_tensor(i)) for i in test_idx],
            indices_are_shapes=True,
        )