Put math Ops in theano.tensor.math and array Ops in theano.tensor.basic

e0e5b3b8 · Brandon T. Willard · Thomas Wiecki · 2b06fa16 · e0e5b3b8 · e0e5b3b8
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -59,8 +59,8 @@ jobs:
        float32: [0]
        part:
          - "tests --ignore=tests/tensor --ignore=tests/sparse --ignore=tests/tensor/nnet"
-          - "tests/tensor tests/sparse --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_basic_scipy.py --ignore=tests/tensor/test_inplace.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/test_opt.py --ignore=tests/tensor/nnet"
+          - "tests/tensor tests/sparse --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_basic_scipy.py --ignore=tests/tensor/test_inplace.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/test_opt.py --ignore=tests/tensor/nnet"
-          - "tests/tensor/test_basic.py tests/tensor/test_basic_scipy.py tests/tensor/test_inplace.py"
+          - "tests/tensor/test_basic.py tests/tensor/test_math.py tests/tensor/test_basic_scipy.py tests/tensor/test_inplace.py"
          - "tests/tensor/test_elemwise.py tests/tensor/test_opt.py"
          - "tests/tensor/nnet --ignore-glob='*/test_abstract_conv.py'"
          - "tests/tensor/nnet/test_abstract_conv.py"

--- a/doc/library/gradient.txt
+++ b/doc/library/gradient.txt
@@ -36,7 +36,7 @@ List of Implemented R op
 See the :ref:`gradient tutorial <tutcomputinggrads>` for the R op documentation.
 list of ops that support R-op:
- * with test [Most is tests/tensor/test_rop.py]
+ * with test
    * SpecifyShape
    * MaxAndArgmax
    * Subtensor

--- a/doc/library/printing.txt
+++ b/doc/library/printing.txt
@@ -51,10 +51,11 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image
 1) The first is :func:`theano.pp`.
->>> from theano import pp, tensor as tt
+>>> from theano import pp, grad,
+>>> from theano import tensor as tt
 >>> x = tt.dscalar('x')
 >>> y = x ** 2
->>> gy = tt.grad(y, x)
+>>> gy = grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = function([x], gy)

--- a/tests/compile/function/test_function.py
+++ b/tests/compile/function/test_function.py
@@ -3,6 +3,7 @@ import pickle
 import re
 import shutil
 import tempfile
+from collections import OrderedDict
 import numpy as np
 import pytest
@@ -10,11 +11,14 @@ import pytest
 import theano
 from theano.compile.function import function, function_dump
 from theano.compile.io import In
+from theano.configdefaults import config
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.type import (
    bscalar,
    bvector,
    dscalar,
    dvector,
+    fmatrix,
    fscalar,
    fvector,
    vector,
@@ -22,6 +26,9 @@ from theano.tensor.type import (
 )
+floatX = "float32"
 def test_function_dump():
    v = vector()
    fct1 = function([v], v + 1)
@@ -195,7 +202,7 @@ class TestFunctionIn:
            f(0, 0.1, 0)
        # If allow_downcast is None, it should work iff floatX=float32
-        if theano.config.floatX == "float32":
+        if config.floatX == "float32":
            assert np.allclose(f(0, 0, 0.1), 0.1)
        else:
            with pytest.raises(TypeError):
@@ -229,3 +236,68 @@ class TestFunctionIn:
        # If allow_downcast is None, like False
        with pytest.raises(TypeError):
            f(z, z, [0.1])
+def test_pickle_unpickle_with_reoptimization():
+    mode = config.mode
+    if mode in ["DEBUG_MODE", "DebugMode"]:
+        mode = "FAST_RUN"
+    x1 = fmatrix("x1")
+    x2 = fmatrix("x2")
+    x3 = theano.shared(np.ones((10, 10), dtype=floatX))
+    x4 = theano.shared(np.ones((10, 10), dtype=floatX))
+    y = tt_sum(tt_sum(tt_sum(x1 ** 2 + x2) + x3) + x4)
+    updates = OrderedDict()
+    updates[x3] = x3 + 1
+    updates[x4] = x4 + 1
+    f = theano.function([x1, x2], y, updates=updates, mode=mode)
+    # now pickle the compiled theano fn
+    string_pkl = pickle.dumps(f, -1)
+    in1 = np.ones((10, 10), dtype=floatX)
+    in2 = np.ones((10, 10), dtype=floatX)
+    # test unpickle with optimization
+    default = config.reoptimize_unpickled_function
+    try:
+        # the default is True
+        config.reoptimize_unpickled_function = True
+        f_ = pickle.loads(string_pkl)
+        assert f(in1, in2) == f_(in1, in2)
+    finally:
+        config.reoptimize_unpickled_function = default
+def test_pickle_unpickle_without_reoptimization():
+    mode = config.mode
+    if mode in ["DEBUG_MODE", "DebugMode"]:
+        mode = "FAST_RUN"
+    x1 = fmatrix("x1")
+    x2 = fmatrix("x2")
+    x3 = theano.shared(np.ones((10, 10), dtype=floatX))
+    x4 = theano.shared(np.ones((10, 10), dtype=floatX))
+    y = tt_sum(tt_sum(tt_sum(x1 ** 2 + x2) + x3) + x4)
+    updates = OrderedDict()
+    updates[x3] = x3 + 1
+    updates[x4] = x4 + 1
+    f = theano.function([x1, x2], y, updates=updates, mode=mode)
+    # now pickle the compiled theano fn
+    string_pkl = pickle.dumps(f, -1)
+    # compute f value
+    in1 = np.ones((10, 10), dtype=floatX)
+    in2 = np.ones((10, 10), dtype=floatX)
+    # test unpickle without optimization
+    default = config.reoptimize_unpickled_function
+    try:
+        # the default is True
+        config.reoptimize_unpickled_function = False
+        f_ = pickle.loads(string_pkl)
+        assert f(in1, in2) == f_(in1, in2)
+    finally:
+        config.reoptimize_unpickled_function = default
--- a/tests/compile/function/test_pfunc.py
+++ b/tests/compile/function/test_pfunc.py
@@ -8,6 +8,7 @@ from theano.compile.io import In
 from theano.compile.sharedvalue import shared
 from theano.configdefaults import config
 from theano.misc.safe_asarray import _asarray
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.type import (
    bscalar,
    bvector,
@@ -92,7 +93,7 @@ class TestPfunc:
        with pytest.raises(
            TypeError, match=r"^Cannot use a shared variable \(w\) as explicit input"
        ):
-            pfunc([w], tt.sum(w * w))
+            pfunc([w], tt_sum(w * w))
    def test_default_container(self):
        # Ensure it is possible to (implicitly) use a shared variable in a
@@ -101,7 +102,7 @@ class TestPfunc:
        rng = np.random.RandomState(1827)
        w_init = rng.rand(5)
        w = shared(w_init.copy(), "w")
-        reg = tt.sum(w * w)
+        reg = tt_sum(w * w)
        f = pfunc([], reg)
        assert f() == np.sum(w_init * w_init)

--- a/tests/compile/function/test_types.py
+++ b/tests/compile/function/test_types.py
 import copy
+import os
 import pickle
 import time
@@ -16,11 +17,15 @@ from theano.configdefaults import config
 from theano.graph.basic import Constant
 from theano.graph.fg import MissingInputError
 from theano.graph.opt import OpKeyOptimizer, PatternSub
+from theano.tensor.math import dot
+from theano.tensor.math import sum as tt_sum
+from theano.tensor.math import tanh
 from theano.tensor.type import (
    dmatrix,
    dscalar,
    dscalars,
    dvector,
+    fmatrix,
    fscalar,
    iscalar,
    matrix,
@@ -317,7 +322,7 @@ class TestFunction:
        # SharedVariable for tests, one of them has update
        y = theano.shared(value=1)
        z = theano.shared(value=2)
-        out = tt.tanh((x + y + 2) / (x + z - 0.2) ** 2)
+        out = tanh((x + y + 2) / (x + z - 0.2) ** 2)
        # Test for different linkers
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
@@ -426,7 +431,7 @@ class TestFunction:
        x = vector("x")
        y = vector("y")
        # this formular has no sense but for a test
-        out = (tt.sum(x) - y) ** 2
+        out = (tt_sum(x) - y) ** 2
        train = theano.function(
            [i],
            out,
@@ -921,7 +926,7 @@ class TestPicklefunction:
        f = function(
            [a, x, s, xm, sm],
-            ((a.T.T) * (tt.dot(xm, (sm.T.T.T)) + x).T * (x / x) + s),
+            ((a.T.T) * (dot(xm, (sm.T.T.T)) + x).T * (x / x) + s),
        )
        old_default_mode = config.mode
        old_default_opt = config.optimizer
@@ -1061,7 +1066,7 @@ class TestPicklefunction:
        x = matrix()
        y = theano.shared(b)
-        f = theano.function([x], tt.dot(x, y))
+        f = theano.function([x], dot(x, y))
        from io import BytesIO
@@ -1175,7 +1180,7 @@ def test_sync_update():
        target=tests.gpuarray.config.test_ctx_name,
    )
-    updates = [(w, w + np.asarray(0.001, "float32") * tt.dot(x, x))]
+    updates = [(w, w + np.asarray(0.001, "float32") * dot(x, x))]
    f = theano.function([], updates=updates, mode=tests.gpuarray.config.mode_with_gpu)
    assert len(f.maker.fgraph.apply_nodes) == 1
@@ -1221,3 +1226,40 @@ def test_sync_update():
    d1 = t_1 - t_0
    d2 = t_2 - t_1
    assert d1 > d2, (d1, d2)
+def test_FunctionMaker_cache_optimizations():
+    opt_db_file = os.path.join(config.compiledir, "optimized_graphs.pkl")
+    if os.path.exists(opt_db_file):
+        os.remove(opt_db_file)
+    floatX = "float32"
+    mode = config.mode
+    if mode in ["DEBUG_MODE", "DebugMode"]:
+        mode = "FAST_RUN"
+    graph_db_file = os.path.join(config.compiledir, "optimized_graphs.pkl")
+    assert not os.path.exists(graph_db_file)
+    with config.change_flags(cache_optimizations=True):
+        a = fmatrix("a")
+        b = fmatrix("b")
+        c = theano.shared(np.ones((10, 10), dtype=floatX))
+        d = theano.shared(np.ones((10, 10), dtype=floatX))
+        e = tt_sum(tt_sum(tt_sum(a ** 2 + b) + c) + d)
+        f1 = theano.function([a, b], e, mode=mode)
+        # FIXME: We can do much better about testing this.
+        assert os.path.exists(graph_db_file)
+        m = fmatrix("x1")
+        n = fmatrix("x2")
+        p = theano.shared(np.ones((10, 10), dtype=floatX))
+        q = theano.shared(np.ones((10, 10), dtype=floatX))
+        j = tt_sum(tt_sum(tt_sum(m ** 2 + n) + p) + q)
+        f2 = theano.function([m, n], j, mode=mode)
+        in1 = np.ones((10, 10), dtype=floatX)
+        in2 = np.ones((10, 10), dtype=floatX)
+        assert f1(in1, in2) == f2(in1, in2)
--- a/tests/compile/test_builders.py
+++ b/tests/compile/test_builders.py
@@ -4,7 +4,6 @@ import numpy as np
 import pytest
 import theano
-import theano.tensor as tt
 from tests import unittest_tools
 from theano import shared
 from theano.compile.builders import OpFromGraph
@@ -12,6 +11,9 @@ from theano.compile.function import function
 from theano.configdefaults import config
 from theano.gradient import DisconnectedType, Rop, grad
 from theano.graph.null_type import NullType
+from theano.tensor.math import dot, exp
+from theano.tensor.math import round as tt_round
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.nnet import sigmoid
 from theano.tensor.random.utils import RandomStream
 from theano.tensor.type import TensorType, matrices, matrix, scalar, vector, vectors
@@ -43,7 +45,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
    )
    def test_size_changes(self, cls_ofg):
        x, y, z = matrices("xyz")
-        e = tt.dot(x, y)
+        e = dot(x, y)
        op = cls_ofg([x, y], [e])
        f = op(x, op(y, z))
        fn = function([x, y, z], f)
@@ -65,7 +67,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        e = x + y * z
        op = cls_ofg([x, y, z], [e])
        f = op(x, y, z)
-        f = f - grad(tt.sum(f), y)
+        f = f - grad(tt_sum(f), y)
        fn = function([x, y, z], f)
        xv = np.ones((2, 2), dtype=config.floatX)
        yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -80,8 +82,8 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        e = x + y * z
        op = cls_ofg([x, y, z], [e])
        f = op(x, y, z)
-        f = f - grad(tt.sum(f), y)
+        f = f - grad(tt_sum(f), y)
-        f = f - grad(tt.sum(f), y)
+        f = f - grad(tt_sum(f), y)
        fn = function([x, y, z], f)
        xv = np.ones((2, 2), dtype=config.floatX)
        yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -117,7 +119,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        e = x + y * z + s
        op = cls_ofg([x, y, z], [e])
        f = op(x, y, z)
-        f = f - grad(tt.sum(f), y)
+        f = f - grad(tt_sum(f), y)
        fn = function([x, y, z], f)
        xv = np.ones((2, 2), dtype=config.floatX)
        yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -126,7 +128,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        # grad again the shared variable
        f = op(x, y, z)
-        f = f - grad(tt.sum(f), s)
+        f = f - grad(tt_sum(f), s)
        fn = function([x, y, z], f)
        assert np.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
@@ -150,7 +152,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        # single override case (function or OfG instance)
        xx, yy = vector("xx"), vector("yy")
        for op in [op_mul, op_mul2]:
-            zz = tt.sum(op(xx, yy))
+            zz = tt_sum(op(xx, yy))
            dx, dy = grad(zz, [xx, yy])
            fn = function([xx, yy], [dx, dy])
            xv = np.random.rand(16).astype(config.floatX)
@@ -176,7 +178,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
            [x, w, b], [x * w + b], grad_overrides=[go1, go2, "default"]
        )
        xx, ww, bb = vector("xx"), vector("yy"), vector("bb")
-        zz = tt.sum(op_linear(xx, ww, bb))
+        zz = tt_sum(op_linear(xx, ww, bb))
        dx, dw, db = grad(zz, [xx, ww, bb])
        fn = function([xx, ww, bb], [dx, dw, db])
        xv = np.random.rand(16).astype(config.floatX)
@@ -193,7 +195,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
            [x * w + b],
            grad_overrides=[go1, NullType()(), DisconnectedType()()],
        )
-        zz2 = tt.sum(op_linear2(xx, ww, bb))
+        zz2 = tt_sum(op_linear2(xx, ww, bb))
        dx2, dw2, db2 = grad(
            zz2,
            [xx, ww, bb],
@@ -211,7 +213,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
    )
    def test_lop_override(self, cls_ofg):
        x = vector()
-        y = 1.0 / (1.0 + tt.exp(-x))
+        y = 1.0 / (1.0 + exp(-x))
        def lop_ov(inps, outs, grads):
            (y_,) = outs
@@ -222,12 +224,12 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
        op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy])
        xx = vector()
-        yy1 = tt.sum(sigmoid(xx))
+        yy1 = tt_sum(sigmoid(xx))
        gyy1 = 2.0 * grad(yy1, xx)
        for ov in [lop_ov, op_lop_ov]:
            op = cls_ofg([x], [y], lop_overrides=ov)
-            yy2 = tt.sum(op(xx))
+            yy2 = tt_sum(op(xx))
            gyy2 = grad(yy2, xx)
            fn = function([xx], [gyy1, gyy2])
@@ -241,7 +243,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
    def test_rop(self, cls_ofg):
        a = vector()
        M = matrix()
-        b = tt.dot(a, M)
+        b = dot(a, M)
        op_matmul = cls_ofg([a, M], [b])
        x = vector()
        W = matrix()
@@ -295,7 +297,7 @@ class TestOpFromGraph(unittest_tools.InferShapeTester):
            del x
            # but we know how to backpropagate for x for some reasons
            # and we don't care about the gradient wrt y.
-            return y + tt.round(y)
+            return y + tt_round(y)
        def f1_back(inputs, output_gradients):
            return [output_gradients[0], theano.gradient.disconnected_type()]

--- a/tests/compile/test_debugmode.py
+++ b/tests/compile/test_debugmode.py
@@ -13,6 +13,7 @@ from theano.graph.op import COp, Op
 from theano.graph.opt import local_optimizer
 from theano.graph.optdb import EquilibriumDB
 from theano.graph.toolbox import BadOptimization
+from theano.tensor.math import add, dot, log
 from theano.tensor.type import TensorType, dvector, fmatrix, fvector, vector
@@ -227,9 +228,9 @@ def test_badthunkoutput():
 def test_badoptimization():
-    @local_optimizer([tt.add])
+    @local_optimizer([add])
    def insert_broken_add(fgraph, node):
-        if node.op == tt.add:
+        if node.op == add:
            return [off_by_half(*node.inputs)]
        return False
@@ -253,18 +254,18 @@ def test_badoptimization():
 def test_badoptimization_opt_err():
    # This variant of test_badoptimization() replace the working code
    # with a new apply node that will raise an error.
-    @local_optimizer([tt.add])
+    @local_optimizer([add])
    def insert_bigger_b_add(fgraph, node):
-        if node.op == tt.add:
+        if node.op == add:
            inputs = list(node.inputs)
            if inputs[-1].owner is None:
                inputs[-1] = tt.concatenate((inputs[-1], inputs[-1]))
                return [node.op(*inputs)]
        return False
-    @local_optimizer([tt.add])
+    @local_optimizer([add])
    def insert_bad_dtype(fgraph, node):
-        if node.op == tt.add:
+        if node.op == add:
            inputs = list(node.inputs)
            if inputs[-1].owner is None:
@@ -316,9 +317,9 @@ def test_stochasticoptimization():
    last_time_replaced = [False]
-    @local_optimizer([tt.add])
+    @local_optimizer([add])
    def insert_broken_add_sometimes(fgraph, node):
-        if node.op == tt.add:
+        if node.op == add:
            last_time_replaced[0] = not last_time_replaced[0]
            if last_time_replaced[0]:
                return [off_by_half(*node.inputs)]
@@ -334,7 +335,7 @@ def test_stochasticoptimization():
    with pytest.raises(debugmode.StochasticOrder):
        theano.function(
            [a, b],
-            tt.add(a, b),
+            add(a, b),
            mode=debugmode.DebugMode(
                optimizer=opt,
                check_c_code=True,
@@ -559,7 +560,7 @@ class TestCheckIsfinite:
    def test_check_isfinite(self):
        x = vector()
        f = theano.function([x], (x + 2) * 5, mode="DEBUG_MODE")
-        g = theano.function([x], tt.log(x), mode="DEBUG_MODE")
+        g = theano.function([x], log(x), mode="DEBUG_MODE")
        # this should work
        f(np.log([3, 4, 5]).astype(config.floatX))
@@ -736,7 +737,7 @@ class TestPreallocatedOutput:
        b = fmatrix("b")
        z = BrokenCImplementationAdd()(a, b)
        # In this test, we do not want z to be an output of the graph.
-        out = tt.dot(z, np.eye(7))
+        out = dot(z, np.eye(7))
        a_val = self.rng.randn(7, 7).astype("float32")
        b_val = self.rng.randn(7, 7).astype("float32")

--- a/tests/compile/test_misc.py
+++ b/tests/compile/test_misc.py
 import numpy as np
-from theano import tensor as tt
 from theano.compile.function.pfunc import pfunc
 from theano.compile.sharedvalue import shared
 from theano.gradient import grad
+from theano.tensor.math import dot
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.nnet import sigmoid
 from theano.tensor.type import dvector
@@ -33,9 +34,9 @@ class NNet:
        self.w2 = shared(np.zeros((n_output, n_hidden)), "w2")
        # print self.lr.type
-        self.hidden = sigmoid(tt.dot(self.w1, self.input))
+        self.hidden = sigmoid(dot(self.w1, self.input))
-        self.output = tt.dot(self.w2, self.hidden)
+        self.output = dot(self.w2, self.hidden)
-        self.cost = tt.sum((self.output - self.target) ** 2)
+        self.cost = tt_sum((self.output - self.target) ** 2)
        self.sgd_updates = {
            self.w1: self.w1 - self.lr * grad(self.cost, self.w1),

--- a/tests/compile/test_mode.py
+++ b/tests/compile/test_mode.py
 import pytest
 import theano
-import theano.tensor as tt
 from theano.compile.mode import AddFeatureOptimizer, Mode
 from theano.graph.toolbox import NoOutputFromInplace
+from theano.tensor.math import dot, tanh
 from theano.tensor.type import matrix
@@ -13,8 +13,8 @@ from theano.tensor.type import matrix
 def test_no_output_from_implace():
    x = matrix()
    y = matrix()
-    a = tt.dot(x, y)
+    a = dot(x, y)
-    b = tt.tanh(a)
+    b = tanh(a)
    # Ensure that the elemwise op that produces the output is inplace when
    # using a mode that does not include the optimization

--- a/tests/compile/test_nanguardmode.py
+++ b/tests/compile/test_nanguardmode.py
@@ -10,6 +10,7 @@ import pytest
 import theano
 import theano.tensor as tt
 from theano.compile.nanguardmode import NanGuardMode
+from theano.tensor.math import dot
 from theano.tensor.type import matrix, tensor3
@@ -19,7 +20,7 @@ def test_NanGuardMode():
    # the abnormalties.
    x = matrix()
    w = theano.shared(np.random.randn(5, 7).astype(theano.config.floatX))
-    y = tt.dot(x, w)
+    y = dot(x, w)
    fun = theano.function(
        [x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True)

--- a/tests/disturb_mem.py
+++ b/tests/disturb_mem.py
-from datetime import datetime
-__authors__ = "Ian Goodfellow"
-__credits__ = ["Ian Goodfellow"]
-__license__ = "3-clause BSD"
-__maintainer__ = "Ian Goodfellow"
-__email__ = "goodfeli@iro"
-def disturb_mem():
-    # Allocate a time-dependent amount of objects to increase
-    # chances of subsequently objects' ids changing from run
-    # to run. This is useful for exposing issues that cause
-    # non-deterministic behavior due to dependence on memory
-    # addresses, like iterating over a dict or a set.
-    global l
-    now = datetime.now()
-    ms = now.microsecond
-    ms = int(ms)
-    n = ms % 1000
-    m = ms // 1000
-    l = [[0] * m for i in range(n)]
--- a/tests/diverse_tests.py
+++ b/tests/diverse_tests.py
@@ -8,9 +8,10 @@
 import numpy as np
 import numpy.random
-import theano.tensor as tt
 from tests import unittest_tools as utt
 from theano import config, function, shared
+from theano.gradient import grad
+from theano.tensor.math import dot, exp, log
 from theano.tensor.type import matrix, vector
@@ -35,11 +36,11 @@ class TestScipy:
        b = shared(np.zeros(()))
        # Construct Theano expression graph
-        p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))
+        p_1 = 1 / (1 + exp(-dot(x, w) - b))
-        xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)
+        xent = -y * log(p_1) - (1 - y) * log(1 - p_1)
        prediction = p_1 > 0.5
        cost = xent.mean() + 0.01 * (w ** 2).sum()
-        gw, gb = tt.grad(cost, [w, b])
+        gw, gb = grad(cost, [w, b])
        # Compile expressions to functions
        train = function(

--- a/tests/gpuarray/rnn_support.py
+++ b/tests/gpuarray/rnn_support.py
 import numpy as np
 import theano
-import theano.tensor as tt
 from theano.tensor import nnet
+from theano.tensor.math import dot, tanh
 class Model:
@@ -127,15 +127,15 @@ class GRU(Layer):
        def step(inp, s_prev):
            i_t = nnet.sigmoid(
-                tt.dot(inp, self.W_i) + tt.dot(s_prev, self.R_i) + self.b_wi + self.b_ru
+                dot(inp, self.W_i) + dot(s_prev, self.R_i) + self.b_wi + self.b_ru
            )
            r_t = nnet.sigmoid(
-                tt.dot(inp, self.W_r) + tt.dot(s_prev, self.R_r) + self.b_wr + self.b_rr
+                dot(inp, self.W_r) + dot(s_prev, self.R_r) + self.b_wr + self.b_rr
            )
-            h_hat_t = tt.tanh(
+            h_hat_t = tanh(
-                tt.dot(inp, self.W_h)
+                dot(inp, self.W_h)
-                + (r_t * (tt.dot(s_prev, self.R_h) + self.b_rh))
+                + (r_t * (dot(s_prev, self.R_h) + self.b_rh))
                + self.b_wh
            )
@@ -231,20 +231,20 @@ class LSTM(Layer):
        def step(x_t, h_tm1, c_tm1):
            i_t = nnet.sigmoid(
-                tt.dot(x_t, self.W_i) + tt.dot(h_tm1, self.R_i) + self.b_wi + self.b_ri
+                dot(x_t, self.W_i) + dot(h_tm1, self.R_i) + self.b_wi + self.b_ri
            )
            f_t = nnet.sigmoid(
-                tt.dot(x_t, self.W_f) + tt.dot(h_tm1, self.R_f) + self.b_wf + self.b_rf
+                dot(x_t, self.W_f) + dot(h_tm1, self.R_f) + self.b_wf + self.b_rf
            )
            o_t = nnet.sigmoid(
-                tt.dot(x_t, self.W_o) + tt.dot(h_tm1, self.R_o) + self.b_ro + self.b_wo
+                dot(x_t, self.W_o) + dot(h_tm1, self.R_o) + self.b_ro + self.b_wo
            )
-            c_hat_t = tt.tanh(
+            c_hat_t = tanh(
-                tt.dot(x_t, self.W_c) + tt.dot(h_tm1, self.R_c) + self.b_wc + self.b_rc
+                dot(x_t, self.W_c) + dot(h_tm1, self.R_c) + self.b_wc + self.b_rc
            )
            c_t = f_t * c_tm1 + i_t * c_hat_t
-            h_t = o_t * tt.tanh(c_t)
+            h_t = o_t * tanh(c_t)
            return h_t, c_t
@@ -276,7 +276,7 @@ class FC(Layer):
        self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
    def output(self):
-        return tt.dot(self.X, self.W) + self.b
+        return dot(self.X, self.W) + self.b
 class WrapperLayer(Layer):

--- a/tests/gpuarray/test_blas.py
+++ b/tests/gpuarray/test_blas.py
@@ -7,7 +7,6 @@ from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, test_ctx_name
 from tests.gpuarray.test_basic_ops import makeTester, rand
 from tests.tensor.test_blas import BaseGemv, TestGer
-from theano import tensor as tt
 from theano.configdefaults import config
 from theano.gpuarray import gpuarray_shared_constructor
 from theano.gpuarray.blas import (
@@ -22,7 +21,15 @@ from theano.gpuarray.blas import (
    gpuger_inplace,
    gpuger_no_inplace,
 )
-from theano.tensor.blas import BatchedDot, _dot22, gemm_inplace, gemv, gemv_inplace
+from theano.tensor.blas import (
+    BatchedDot,
+    _dot22,
+    batched_dot,
+    gemm_inplace,
+    gemv,
+    gemv_inplace,
+)
+from theano.tensor.math import dot
 from theano.tensor.type import matrix, tensor, tensor3, vector
@@ -197,7 +204,7 @@ class TestGpuGemmBatchStrided:
        # Reported in https://github.com/Theano/Theano/issues/5730
        x = tensor3()
        y = tensor3()
-        z = tt.batched_dot(x, y[:, 0, :, np.newaxis])
+        z = batched_dot(x, y[:, 0, :, np.newaxis])
        f = theano.function([x, y], z, mode=mode_with_gpu)
        x_num = np.arange(32 * 19 * 600, dtype=config.floatX).reshape((32, 19, 600))
        y_num = np.arange(7 * 32 * 600, dtype=config.floatX).reshape((32, 7, 600))
@@ -270,6 +277,6 @@ def test_gemv_dot_strides():
    yv = rand(5, 1)
    x = gpuarray_shared_constructor(xv)
    y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
-    f = theano.function([], tt.dot(x, y[::-1]), mode=mode_with_gpu)
+    f = theano.function([], dot(x, y[::-1]), mode=mode_with_gpu)
    out = f()
    utt.assert_allclose(out, np.dot(xv, yv[::-1]))
--- a/tests/gpuarray/test_ctc.py
+++ b/tests/gpuarray/test_ctc.py
@@ -3,11 +3,12 @@ import pytest
 import theano
 import theano.gpuarray
-import theano.tensor as tt
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
 from tests.tensor.nnet.test_ctc import setup_ctc_case, setup_grad_case, setup_torch_case
 from theano.gpuarray.ctc import GpuConnectionistTemporalClassification, gpu_ctc
+from theano.gradient import grad
+from theano.tensor.math import mean
 from theano.tensor.nnet.ctc import (
    ConnectionistTemporalClassification,
    ctc,
@@ -50,7 +51,7 @@ class TestCTC:
        outputs = [cpu_ctc_cost]
        if compute_grad:
            # Symbolic gradient of CTC cost
-            cpu_ctc_grad = tt.grad(tt.mean(cpu_ctc_cost), activations)
+            cpu_ctc_grad = grad(mean(cpu_ctc_cost), activations)
            outputs += [cpu_ctc_grad]
        return theano.function([], outputs, mode=mode)
@@ -59,7 +60,7 @@ class TestCTC:
        outputs = [gpu_ctc_cost]
        if compute_grad:
            # Symbolic gradient of CTC cost
-            gpu_ctc_grad = tt.grad(tt.mean(gpu_ctc_cost), activations)
+            gpu_ctc_grad = grad(mean(gpu_ctc_cost), activations)
            outputs += [gpu_ctc_grad]
        return theano.function([], outputs, mode=mode_with_gpu)

--- a/tests/gpuarray/test_dnn.py
+++ b/tests/gpuarray/test_dnn.py
@@ -29,6 +29,21 @@ from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
 from theano.gpuarray import dnn
 from theano.gpuarray.basic_ops import GpuAllocEmpty
 from theano.gpuarray.type import GpuArrayType, gpuarray_shared_constructor
+from theano.tensor.math import (
+    ceil,
+    clip,
+    dot,
+    floor,
+    inv,
+    log,
+    max_and_argmax,
+    mean,
+    minimum,
+    mod,
+    prod,
+    sqrt,
+)
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.nnet import (
    LogSoftmax,
    Softmax,
@@ -660,7 +675,7 @@ def test_pooling_opt_arbitrary_dimensions():
            for mode in modes:
                out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
-                out_pool_grad = theano.grad(tt.sum(out_pool), wrt=input)
+                out_pool_grad = theano.grad(tt_sum(out_pool), wrt=input)
                out = [out_pool, out_pool_grad]
                # run on GPU
@@ -714,7 +729,7 @@ def test_pooling_empty_batch():
    d = f(np.random.rand(*img_shp).astype("float32"))
    assert d.shape == (0, 5, 3, 4)
-    g = theano.grad(tt.sum(o), wrt=img)
+    g = theano.grad(tt_sum(o), wrt=img)
    f = theano.function([img], g, mode=mode_with_gpu)
    d = f(np.random.rand(*img_shp).astype("float32"))
    # Not sure what to assert, it should just pass, that's all.
@@ -1539,7 +1554,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
        # more recent. Don't test if the cuDNN version is too old.
        x = tensor4()
        softmax_out = dnn.GpuDnnSoftmax("accurate", "channel")(x)
-        log_out = tt.log(tt.as_tensor_variable(softmax_out))
+        log_out = log(tt.as_tensor_variable(softmax_out))
        f = theano.function([x], log_out, mode=mode_with_gpu)
@@ -1600,7 +1615,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
        utt.assert_allclose(f(inp), f_ref(inp))
        # Build the first graph and ensure that the optimization is applied
-        log_softmax_out = tt.log(Softmax()(x))
+        log_softmax_out = log(Softmax()(x))
        f = theano.function([x], log_softmax_out, mode=mode_with_gpu)
        dnn_softmax_nodes = [
@@ -1776,7 +1791,7 @@ def test_dnn_reduction_error():
    vecT = vector(dtype=theano.config.floatX)
    outputT = tt.alloc(2.0 * vecT, 5, vecT.shape[0])
-    outputSummedT = tt.sum(tt.transpose(outputT), axis=1)
+    outputSummedT = tt_sum(tt.transpose(outputT), axis=1)
    f3 = theano.function(inputs=[vecT], outputs=outputSummedT)
    output = f3(vec)
@@ -1785,7 +1800,7 @@ def test_dnn_reduction_error():
 def dnn_maxargmax(nd, idtype, axis):
    inp = TensorType(idtype, (False,) * nd)()
-    res = tt.max_and_argmax(inp, axis=axis)
+    res = max_and_argmax(inp, axis=axis)
    f = theano.function([inp], res, mode=mode_with_gpu)
    assert any(
        isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes
@@ -1871,10 +1886,10 @@ def test_dnn_batchnorm_train():
                axes = (0,) + tuple(range(2, ndim))
            x_mean_ref = x.mean(axis=axes, keepdims=True)
            x_var_ref = x.var(axis=axes, keepdims=True)
-            x_invstd_ref = tt.inv(tt.sqrt(x_var_ref + eps))
+            x_invstd_ref = inv(sqrt(x_var_ref + eps))
            scale_ref = tt.addbroadcast(scale, *axes)
            bias_ref = tt.addbroadcast(bias, *axes)
-            m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
+            m = tt.cast(prod(x.shape) / prod(scale.shape), theano.config.floatX)
            out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref
            out_running_mean_ref = (
                running_mean * (1 - running_average_factor)
@@ -2239,7 +2254,7 @@ def test_batchnorm_inference():
            scale_ref, bias_ref, mean_ref, var_ref = (
                tt.addbroadcast(t, *axes) for t in (scale, bias, mean, var)
            )
-            out_ref = (x - mean_ref) * (scale_ref / tt.sqrt(var_ref + eps)) + bias_ref
+            out_ref = (x - mean_ref) * (scale_ref / sqrt(var_ref + eps)) + bias_ref
            # backward pass
            dy = vartype("dy")
            grads_gpu = theano.grad(
@@ -2482,9 +2497,9 @@ def test_dnn_rnn_gru():
    def funcs(out, params, hy=None):
        cost = 0
        if out:
-            cost += tt.mean((Y - out) ** 2)
+            cost += mean((Y - out) ** 2)
        if hy:
-            cost += tt.mean(hy ** 2)
+            cost += mean(hy ** 2)
        grad = theano.grad(cost, [X, h0] + params)
        grad_fn = theano.function(
            [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
@@ -2580,9 +2595,9 @@ def test_dnn_rnn_gru_bidi():
    def funcs(out, params, hy=None):
        cost = 0
        if out:
-            cost += tt.mean((Y - out) ** 2)
+            cost += mean((Y - out) ** 2)
        if hy:
-            cost += tt.mean(hy ** 2)
+            cost += mean(hy ** 2)
        grad = theano.grad(cost, [X, h0] + params)
        grad_fn = theano.function(
            [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
@@ -2652,7 +2667,7 @@ def test_dnn_rnn_lstm():
    def funcs(out, params):
        fn = theano.function([X, h0, c0], out, mode=mode_with_gpu)
-        cost = tt.mean((Y - out) ** 2)
+        cost = mean((Y - out) ** 2)
        grad = theano.grad(cost, [X, h0, c0] + params)
        grad_fn = theano.function([X, Y, h0, c0], grad, mode=mode_with_gpu)
        return fn, grad_fn
@@ -2737,7 +2752,7 @@ def test_dnn_rnn_lstm_grad_c():
            p[:] = layer_params[j].get_value(borrow=True, return_internal_type=True)
    def funcs(out, params):
-        cost = tt.mean((CY - out) ** 2)
+        cost = mean((CY - out) ** 2)
        grad = theano.grad(cost, [X, h0, c0] + params)
        grad_fn = theano.function([X, CY, h0, c0], grad, mode=mode_with_gpu)
        return grad_fn
@@ -2817,11 +2832,11 @@ def test_dnn_spatialtf():
        theta = reshape(theta, (-1, 2, 3))
        # grid of (x_t, y_t, 1), eq (1) in ref [1]
-        out_height = tt.cast(tt.ceil(height * scale_height), "int64")
+        out_height = tt.cast(ceil(height * scale_height), "int64")
-        out_width = tt.cast(tt.ceil(width * scale_width), "int64")
+        out_width = tt.cast(ceil(width * scale_width), "int64")
        grid = _meshgrid(out_height, out_width)
        # transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
-        t_g = tt.dot(theta, grid)
+        t_g = dot(theta, grid)
        x_s = t_g[:, 0]
        y_s = t_g[:, 1]
        x_s_flat = x_s.flatten()
@@ -2852,29 +2867,29 @@ def test_dnn_spatialtf():
        # obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
        # we need those in floatX for interpolation and in int64 for indexing.
-        x0_f = tt.floor(x)
+        x0_f = floor(x)
-        y0_f = tt.floor(y)
+        y0_f = floor(y)
        x1_f = x0_f + 1
        y1_f = y0_f + 1
        # for indexing, we need to take care of the border mode for outside pixels.
        if border_mode == "nearest":
-            x0 = tt.clip(x0_f, 0, width_f - 1)
+            x0 = clip(x0_f, 0, width_f - 1)
-            x1 = tt.clip(x1_f, 0, width_f - 1)
+            x1 = clip(x1_f, 0, width_f - 1)
-            y0 = tt.clip(y0_f, 0, height_f - 1)
+            y0 = clip(y0_f, 0, height_f - 1)
-            y1 = tt.clip(y1_f, 0, height_f - 1)
+            y1 = clip(y1_f, 0, height_f - 1)
        elif border_mode == "mirror":
            w = 2 * (width_f - 1)
-            x0 = tt.minimum(x0_f % w, -x0_f % w)
+            x0 = minimum(x0_f % w, -x0_f % w)
-            x1 = tt.minimum(x1_f % w, -x1_f % w)
+            x1 = minimum(x1_f % w, -x1_f % w)
            h = 2 * (height_f - 1)
-            y0 = tt.minimum(y0_f % h, -y0_f % h)
+            y0 = minimum(y0_f % h, -y0_f % h)
-            y1 = tt.minimum(y1_f % h, -y1_f % h)
+            y1 = minimum(y1_f % h, -y1_f % h)
        elif border_mode == "wrap":
-            x0 = tt.mod(x0_f, width_f)
+            x0 = mod(x0_f, width_f)
-            x1 = tt.mod(x1_f, width_f)
+            x1 = mod(x1_f, width_f)
-            y0 = tt.mod(y0_f, height_f)
+            y0 = mod(y0_f, height_f)
-            y1 = tt.mod(y1_f, height_f)
+            y1 = mod(y1_f, height_f)
        else:
            raise ValueError(
                "border_mode must be one of " "'nearest', 'mirror', 'wrap'"
@@ -2908,7 +2923,7 @@ def test_dnn_spatialtf():
        wb = ((x1_f - x) * (y - y0_f)).dimshuffle(0, "x")
        wc = ((x - x0_f) * (y1_f - y)).dimshuffle(0, "x")
        wd = ((x - x0_f) * (y - y0_f)).dimshuffle(0, "x")
-        output = tt.sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0)
+        output = tt_sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0)
        return output
    def _linspace(start, stop, num):
@@ -2930,12 +2945,8 @@ def test_dnn_spatialtf():
        # Note: If the image size is known at layer construction time, we could
        # compute the meshgrid offline in numpy instead of doing it dynamically
        # in Theano. However, it hardly affected performance when we tried.
-        x_t = tt.dot(
+        x_t = dot(tt.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle("x", 0))
-            tt.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle("x", 0)
+        y_t = dot(_linspace(-1.0, 1.0, height).dimshuffle(0, "x"), tt.ones((1, width)))
-        )
-        y_t = tt.dot(
-            _linspace(-1.0, 1.0, height).dimshuffle(0, "x"), tt.ones((1, width))
-        )
        x_t_flat = x_t.reshape((1, -1))
        y_t_flat = y_t.reshape((1, -1))
@@ -3019,7 +3030,7 @@ def test_dnn_spatialtf_grad():
    theta = tensor3("theta")
    out = dnn.dnn_spatialtf(inputs, theta, scale_height=0.25, scale_width=0.75)
-    out_mean = tt.mean(out)
+    out_mean = mean(out)
    mean_gi = theano.grad(out_mean, [inputs])
    mean_gt = theano.grad(out_mean, [theta])

--- a/tests/gpuarray/test_elemwise.py
+++ b/tests/gpuarray/test_elemwise.py
@@ -29,6 +29,7 @@ from theano.gpuarray.elemwise import (
 from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
 from theano.link.basic import PerformLinker
 from theano.link.c.basic import CLinker
+from theano.tensor.math import erfcinv, erfinv, mul, tanh
 from theano.tensor.type import bvector, float_dtypes, fmatrix, fvector, vector
@@ -131,7 +132,7 @@ class TestMathErrorFunctions:
    def test_elemwise_erfinv(self):
        for dtype in self.dtypes:
            vec = vector(dtype=dtype)
-            output = tt.erfinv(vec)
+            output = erfinv(vec)
            f_host = theano.function(
                [vec],
                output,
@@ -166,7 +167,7 @@ class TestMathErrorFunctions:
    def test_elemwise_erfcinv(self):
        for dtype in self.dtypes:
            vec = vector(dtype=dtype)
-            output = tt.erfcinv(vec)
+            output = erfcinv(vec)
            f_host = theano.function(
                [vec],
                output,
@@ -205,7 +206,7 @@ class TestFloat16:
        x = vector(dtype="float16")
        y = fvector()
-        cz = tt.tanh(x + tt.cast(y, "float16"))
+        cz = tanh(x + tt.cast(y, "float16"))
        o = (
            cz
            - cz ** 2
@@ -223,7 +224,7 @@ class TestFloat16:
        y = vector(dtype="float16")
        z = vector(dtype="float16")
-        o = tt.switch(v, tt.mul(w, x, y), z)
+        o = tt.switch(v, mul(w, x, y), z)
        theano.function([v, w, x, y, z], o, mode=mode_with_gpu)
    def test_cast_float16(self):

--- a/tests/gpuarray/test_extra_ops.py
+++ b/tests/gpuarray/test_extra_ops.py
@@ -5,7 +5,7 @@ import numpy as np
 import pytest
 import theano
-import theano.tensor as tt
+import theano.tensor.math as tm
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, test_ctx_name
 from tests.tensor.test_extra_ops import TestCumOp
@@ -30,13 +30,13 @@ class TestGpuCumOp(TestCumOp):
        # The CPU implementation is not so accurate, which throws out DebugMode.
        # Since propagating .tag.values_eq_approx to the output of every
        # GpuFromHost seems overkill, we just relax the rtol for these tests
-        self.old_rtol = tt.float32_rtol
+        self.old_rtol = tm.float32_rtol
-        tt.float32_rtol *= 2
+        tm.float32_rtol *= 2
    def teardown_method(self):
        super().teardown_method()
        # Restore rtol
-        tt.float32_rtol = self.old_rtol
+        tm.float32_rtol = self.old_rtol
    @pytest.mark.skipif(
        theano.config.floatX != "float32",

--- a/tests/gpuarray/test_nnet.py
+++ b/tests/gpuarray/test_nnet.py
@@ -10,6 +10,8 @@ from theano.gpuarray.nnet import (
    GpuSoftmax,
    GpuSoftmaxWithBias,
 )
+from theano.gradient import grad
+from theano.tensor.math import argmax, log, mean
 from theano.tensor.nnet import crossentropy_softmax_1hot_with_bias_dx
 from theano.tensor.type import fmatrix, fvector, lvector, matrix, vector
@@ -51,9 +53,9 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = theano.tensor.nnet.softmax(dot_result + b)
-    y_pred = tt.argmax(p_y_given_x, axis=-1)
+    y_pred = argmax(p_y_given_x, axis=-1)
-    loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
+    loss = -mean(log(p_y_given_x)[tt.arange(y.shape[0]), y])
-    dW = tt.grad(loss, dot_result)
+    dW = grad(loss, dot_result)
    classify = theano.function(
        inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu
    )

--- a/tests/gpuarray/test_opt.py
+++ b/tests/gpuarray/test_opt.py
@@ -35,6 +35,8 @@ from theano.gpuarray.subtensor import GpuSubtensor
 from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
 from theano.graph.opt import check_stack_trace
 from theano.tensor.basic import Alloc, AllocEmpty, Rebroadcast
+from theano.tensor.blas import batched_dot
+from theano.tensor.math import dot, eq, exp, gt, tanh
 from theano.tensor.nnet import abstract_conv
 from theano.tensor.type import (
    TensorType,
@@ -80,7 +82,7 @@ def _check_stack_trace(thing):
 def test_local_assert():
    x = fmatrix()
-    a = assert_op(x, tt.eq(x, 0).any())
+    a = assert_op(x, eq(x, 0).any())
    f = theano.function([x], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    a_op = [n for n in topo if isinstance(n.op, Assert)]
@@ -90,7 +92,7 @@ def test_local_assert():
 def test_local_remove_all_assert():
    x = fmatrix()
-    a = assert_op(x, tt.eq(x, 0).any())
+    a = assert_op(x, eq(x, 0).any())
    # By default `unsafe` should not be there
    f = theano.function([x], a, mode=mode_with_gpu.excluding("unsafe"))
@@ -398,7 +400,7 @@ def test_pdbbreakpoint_op():
    # Create a function composed of a breakpoint followed by
    # some computation
-    condition = tt.gt(b.sum(), 0)
+    condition = gt(b.sum(), 0)
    b_monitored = PdbBreakpoint(name="TestBreakpoint")(condition, b)
    output = b_monitored ** 2
@@ -645,7 +647,7 @@ def test_not_useless_scalar_gpuelemwise():
        X = fmatrix()
        x = np.random.randn(32, 32).astype(np.float32)
        m1 = theano.shared(np.random.randn(32, 32).astype(np.float32))
-        loss = (X - tt.dot(X, m1)).norm(L=2)
+        loss = (X - dot(X, m1)).norm(L=2)
        lr = theano.shared(np.asarray(0.001, dtype=np.float32))
        grad = theano.grad(loss, m1)
@@ -673,7 +675,7 @@ def test_local_assert_no_cpu_op():
    rng = np.random.RandomState(utt.fetch_seed())
    m = rng.uniform(-1, 1, (10, 10)).astype("float32")
    ms = gpuarray_shared_constructor(m, name="m_shared")
-    out = tt.tanh(ms).dot(ms.T)
+    out = tanh(ms).dot(ms.T)
    mode_local_assert = mode_with_gpu.including("assert_no_cpu_op")
    mode_local_assert = mode_local_assert.excluding("local_gpua_elemwise")
@@ -691,7 +693,7 @@ def test_no_complex():
    width_var = cscalar()
    freq_var = fscalar()
    signal_var = fscalar()
-    stft_out = tt.exp(width_var * freq_var) * signal_var
+    stft_out = exp(width_var * freq_var) * signal_var
    f = theano.function([width_var, freq_var, signal_var], stft_out, mode=mode_with_gpu)
    assert _check_stack_trace(f)
@@ -726,7 +728,7 @@ def test_gpu_solve_not_inplace():
    A = fmatrix()
    b = fmatrix()
    s = slinalg.solve(A, b)
-    o = tt.dot(A, s)
+    o = dot(A, s)
    f_cpu = theano.function([A, b], o, mode_without_gpu)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    count_not_inplace = len(
@@ -795,8 +797,8 @@ def test_local_gpua_advanced_incsubtensor():
    target = ftensor4()
    y = target.dimshuffle(1, 0, 2, 3).flatten(ndim=1)
    w = tt.ones_like(y)
-    w = theano.tensor.subtensor.set_subtensor(w[tt.eq(y, 1.0).nonzero()], 100)
+    w = theano.tensor.subtensor.set_subtensor(w[eq(y, 1.0).nonzero()], 100)
-    w = theano.tensor.subtensor.set_subtensor(w[tt.eq(y, -1.0).nonzero()], 0)
+    w = theano.tensor.subtensor.set_subtensor(w[eq(y, -1.0).nonzero()], 0)
    f = theano.function([target], w)
    assert _check_stack_trace(f)
@@ -823,7 +825,7 @@ def test_batched_dot_lifter():
        y = TensorType(broadcastable=[s == 1 for s in y_val.shape], dtype=y_val.dtype)(
            "y"
        )
-        z = tt.batched_dot(x, y)
+        z = batched_dot(x, y)
        f = theano.function([x, y], z, mode=mode_with_gpu)
        f(x_val, y_val)
        assert check_stack_trace(f, ops_to_check="all")

--- a/tests/gpuarray/test_reduction.py
+++ b/tests/gpuarray/test_reduction.py
@@ -11,6 +11,8 @@ from tests.gpuarray.test_basic_ops import rand_gpuarray
 from theano.gpuarray import GpuArrayType
 from theano.gpuarray.dnn import GpuDnnReduction
 from theano.gpuarray.reduction import GpuMaxAndArgmax
+from theano.tensor.math import argmax
+from theano.tensor.math import max as tt_max
 # Number of values to be used in test tensors (except with 0-D tensors!).
@@ -113,7 +115,7 @@ class BaseTest:
        M = self.get_host_tensor()
        f = theano.function(
            [M],
-            [tt.max(M, axis=axis), tt.argmax(M, axis=axis)],
+            [tt_max(M, axis=axis), argmax(M, axis=axis)],
            name="shape:" + str(test_tensor.shape) + "/axis:" + str(axis) + "/HOST",
            mode=mode_without_gpu,
        )
@@ -128,7 +130,7 @@ class BaseTest:
        M = self.get_gpu_tensor()
        f = theano.function(
            [M],
-            [tt.max(M, axis=axis), tt.argmax(M, axis=axis)],
+            [tt_max(M, axis=axis), argmax(M, axis=axis)],
            name="shape:" + str(test_gpu_tensor.shape) + "/axis:" + str(axis) + "/GPU",
            mode=mode_with_gpu,
        )

--- a/tests/gpuarray/test_scan.py
+++ b/tests/gpuarray/test_scan.py
@@ -12,6 +12,8 @@ from theano.gpuarray.elemwise import GpuElemwise
 from theano.scan.basic import scan
 from theano.scan.checkpoints import scan_checkpoints
 from theano.scan.op import Scan
+from theano.tensor.math import dot
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.type import fscalar, ftensor3, fvector, iscalar, vector
@@ -574,10 +576,10 @@ class ScanGpuTests:
        nparams = [U, V, W]
        # Build the forward pass
-        l1_base = tt.dot(xin, U)
+        l1_base = dot(xin, U)
        def scan_l(baseline, last_step):
-            return baseline + tt.dot(last_step, V)
+            return baseline + dot(last_step, V)
        zero_output = tt.alloc(np.asarray(0.0, dtype="float32"), mb_size, n_hid)
@@ -588,10 +590,10 @@ class ScanGpuTests:
            mode=self.mode_with_gpu_nodebug,
        )
-        l2_out = tt.dot(l1_out, W)
+        l2_out = dot(l1_out, W)
        # Compute the cost and take the gradient wrt params
-        cost = tt.sum((l2_out - yout) ** 2)
+        cost = tt_sum((l2_out - yout) ** 2)
        grads = theano.grad(cost, nparams)
        updates = list(zip(nparams, (n - g for n, g in zip(nparams, grads))))

--- a/tests/gpuarray/test_subtensor.py
+++ b/tests/gpuarray/test_subtensor.py
@@ -5,7 +5,6 @@ from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, test_ctx_name
 from tests.tensor.test_basic import TestAllocDiag
 from tests.tensor.test_subtensor import TestAdvancedSubtensor, TestSubtensor
-from theano import tensor as tt
 from theano.compile import DeepCopyOp
 from theano.gpuarray.basic_ops import GpuContiguous, GpuFromHost, HostFromGpu
 from theano.gpuarray.elemwise import GpuDimShuffle
@@ -22,6 +21,7 @@ from theano.gpuarray.subtensor import (
 )
 from theano.gpuarray.type import gpuarray_shared_constructor
 from theano.tensor.basic import AllocDiag, ExtractDiag
+from theano.tensor.math import sum as tt_sum
 from theano.tensor.subtensor import advanced_inc_subtensor1, inc_subtensor
 from theano.tensor.type import ivectors, matrix, tensor, tensor4, vector
@@ -395,7 +395,7 @@ class TestGpuAllocDiag(TestAllocDiag):
        # offset = 0 case:
        mtx_x = GpuAllocDiag()(x)
-        sum_mtx_x = tt.sum(mtx_x)
+        sum_mtx_x = tt_sum(mtx_x)
        grad_x = theano.grad(sum_mtx_x, x)
        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)
@@ -409,7 +409,7 @@ class TestGpuAllocDiag(TestAllocDiag):
        # offset > 0 case:
        mtx_x = GpuAllocDiag(2)(x)
-        sum_mtx_x = tt.sum(mtx_x)
+        sum_mtx_x = tt_sum(mtx_x)
        grad_x = theano.grad(sum_mtx_x, x)
        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)
@@ -423,7 +423,7 @@ class TestGpuAllocDiag(TestAllocDiag):
        # offset < 0 case:
        mtx_x = GpuAllocDiag(-3)(x)
-        sum_mtx_x = tt.sum(mtx_x)
+        sum_mtx_x = tt_sum(mtx_x)
        grad_x = theano.grad(sum_mtx_x, x)
        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)

--- a/tests/graph/test_basic.py
+++ b/tests/graph/test_basic.py
@@ -25,6 +25,7 @@ from theano.graph.basic import (
 )
 from theano.graph.op import Op
 from theano.graph.type import Type
+from theano.tensor.math import max_and_argmax
 from theano.tensor.type import TensorType, iscalars, matrix, scalars
 from theano.tensor.type_other import NoneConst
 from theano.tensor.var import TensorVariable
@@ -337,8 +338,8 @@ def test_equal_computations():
    assert equal_computations([NoneConst], [NoneConst])
    m = matrix()
-    max_argmax1 = tt.max_and_argmax(m)
+    max_argmax1 = max_and_argmax(m)
-    max_argmax2 = tt.max_and_argmax(m)
+    max_argmax2 = max_and_argmax(m)
    assert equal_computations(max_argmax1, max_argmax2)

--- a/tests/graph/test_compute_test_value.py
+++ b/tests/graph/test_compute_test_value.py
@@ -11,7 +11,7 @@ from theano.graph import utils
 from theano.graph.basic import Apply
 from theano.graph.op import COp, Op
 from theano.graph.type import Type
-from theano.tensor.basic import _allclose
+from theano.tensor.math import _allclose, dot
 from theano.tensor.type import fmatrix, iscalar, matrix, vector
@@ -84,7 +84,7 @@ class TestComputeTestValue:
        y.tag.test_value = np.random.rand(4, 5).astype(config.floatX)
        # should work
-        z = tt.dot(x, y)
+        z = dot(x, y)
        assert hasattr(z.tag, "test_value")
        f = theano.function([x, y], z)
        assert _allclose(f(x.tag.test_value, y.tag.test_value), z.tag.test_value)
@@ -92,7 +92,7 @@ class TestComputeTestValue:
        # this test should fail
        y.tag.test_value = np.random.rand(6, 5).astype(config.floatX)
        with pytest.raises(ValueError):
-            tt.dot(x, y)
+            dot(x, y)
    def test_compute_flag(self):
        x = matrix("x")
@@ -101,18 +101,18 @@ class TestComputeTestValue:
        # should skip computation of test value
        with config.change_flags(compute_test_value="off"):
-            z = tt.dot(x, y)
+            z = dot(x, y)
            assert not hasattr(z.tag, "test_value")
        # should fail when asked by user
        with pytest.raises(ValueError), config.change_flags(compute_test_value="raise"):
-            tt.dot(x, y)
+            dot(x, y)
        # test that a warning is raised if required
        with warnings.catch_warnings(), config.change_flags(compute_test_value="warn"):
            warnings.simplefilter("error", UserWarning)
            with pytest.raises(UserWarning):
-                tt.dot(x, y)
+                dot(x, y)
    def test_string_var(self):
        x = matrix("x")
@@ -123,13 +123,13 @@ class TestComputeTestValue:
        z = theano.shared(np.random.rand(5, 6).astype(config.floatX))
        # should work
-        out = tt.dot(tt.dot(x, y), z)
+        out = dot(dot(x, y), z)
        assert hasattr(out.tag, "test_value")
        tf = theano.function([x, y], out)
        assert _allclose(tf(x.tag.test_value, y.tag.test_value), out.tag.test_value)
        def f(x, y, z):
-            return tt.dot(tt.dot(x, y), z)
+            return dot(dot(x, y), z)
        # this test should fail
        z.set_value(np.random.rand(7, 6).astype(config.floatX))
@@ -142,7 +142,7 @@ class TestComputeTestValue:
        y = theano.shared(np.random.rand(4, 6).astype(config.floatX), "y")
        # should work
-        z = tt.dot(x, y)
+        z = dot(x, y)
        assert hasattr(z.tag, "test_value")
        f = theano.function([x], z)
        assert _allclose(f(x.tag.test_value), z.tag.test_value)
@@ -150,14 +150,14 @@ class TestComputeTestValue:
        # this test should fail
        y.set_value(np.random.rand(5, 6).astype(config.floatX))
        with pytest.raises(ValueError):
-            tt.dot(x, y)
+            dot(x, y)
    def test_ndarray(self):
        x = np.random.rand(2, 3).astype(config.floatX)
        y = theano.shared(np.random.rand(3, 6).astype(config.floatX), "y")
        # should work
-        z = tt.dot(x, y)
+        z = dot(x, y)
        assert hasattr(z.tag, "test_value")
        f = theano.function([], z)
        assert _allclose(f(), z.tag.test_value)
@@ -165,7 +165,7 @@ class TestComputeTestValue:
        # this test should fail
        x = np.random.rand(2, 4).astype(config.floatX)
        with pytest.raises(ValueError):
-            tt.dot(x, y)
+            dot(x, y)
    def test_empty_elemwise(self):
        x = theano.shared(np.random.rand(0, 6).astype(config.floatX), "x")
@@ -181,7 +181,7 @@ class TestComputeTestValue:
        y = theano.shared(np.random.rand(3, 6).astype(config.floatX), "y")
        # should work
-        z = tt.dot(x, y)
+        z = dot(x, y)
        assert hasattr(z.tag, "test_value")
        f = theano.function([], z)
        assert _allclose(f(), z.tag.test_value)
@@ -189,7 +189,7 @@ class TestComputeTestValue:
        # this test should fail
        x = tt.constant(np.random.rand(2, 4), dtype=config.floatX)
        with pytest.raises(ValueError):
-            tt.dot(x, y)
+            dot(x, y)
    def test_incorrect_type(self):
@@ -242,7 +242,7 @@ class TestComputeTestValue:
        A.tag.test_value = np.random.rand(5, 3).astype(config.floatX)
        def fx(prior_result, A):
-            return tt.dot(prior_result, A)
+            return dot(prior_result, A)
        with pytest.raises(ValueError) as e:
            theano.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k)
@@ -260,7 +260,7 @@ class TestComputeTestValue:
        A.tag.test_value = np.random.rand(5, 3).astype(config.floatX)
        def fx(prior_result, A):
-            return tt.dot(prior_result, A)
+            return dot(prior_result, A)
        with pytest.raises(ValueError):
            theano.scan(

--- a/tests/graph/test_graph_opt_caching.py
+++ b/tests/graph/test_graph_opt_caching.py
-import os
-import numpy as np
-import theano
-import theano.tensor as tt
-from theano.tensor.type import fmatrix
-def test_graph_opt_caching():
-    opt_db_file = os.path.join(theano.config.compiledir, "optimized_graphs.pkl")
-    if os.path.exists(opt_db_file):
-        os.remove(opt_db_file)
-    floatX = "float32"
-    mode = theano.config.mode
-    if mode in ["DEBUG_MODE", "DebugMode"]:
-        mode = "FAST_RUN"
-    with theano.config.change_flags(cache_optimizations=True):
-        a = fmatrix("a")
-        b = fmatrix("b")
-        c = theano.shared(np.ones((10, 10), dtype=floatX))
-        d = theano.shared(np.ones((10, 10), dtype=floatX))
-        e = tt.sum(tt.sum(tt.sum(a ** 2 + b) + c) + d)
-        f1 = theano.function([a, b], e, mode=mode)
-        m = fmatrix("x1")
-        n = fmatrix("x2")
-        p = theano.shared(np.ones((10, 10), dtype=floatX))
-        q = theano.shared(np.ones((10, 10), dtype=floatX))
-        j = tt.sum(tt.sum(tt.sum(m ** 2 + n) + p) + q)
-        f2 = theano.function([m, n], j, mode=mode)
-        in1 = np.ones((10, 10), dtype=floatX)
-        in2 = np.ones((10, 10), dtype=floatX)
-        assert f1(in1, in2) == f2(in1, in2)
--- a/tests/graph/test_op.py
+++ b/tests/graph/test_op.py
@@ -11,6 +11,7 @@ from theano.graph.basic import Apply, Variable
 from theano.graph.op import COp, Op
 from theano.graph.type import Generic, Type
 from theano.graph.utils import MethodNotDefined, TestValueError
+from theano.tensor.math import log
 from theano.tensor.type import dmatrix, vector
@@ -289,7 +290,7 @@ def test_test_value_shared():
 @config.change_flags(compute_test_value="raise")
 def test_test_value_op():
-    x = tt.log(np.ones((5, 5)))
+    x = log(np.ones((5, 5)))
    v = op.get_test_value(x)
    assert np.allclose(v, np.zeros((5, 5)))

--- a/tests/graph/test_opt.py
+++ b/tests/graph/test_opt.py
 import pytest
-import theano.tensor as tt
 from tests.graph.utils import (
    MyType,
    MyVariable,
@@ -30,6 +29,7 @@ from theano.graph.opt import (
    pre_greedy_local_optimizer,
    theano,
 )
+from theano.tensor.math import dot
 from theano.tensor.opt import constant_folding
 from theano.tensor.subtensor import AdvancedSubtensor
 from theano.tensor.type import matrix
@@ -332,7 +332,7 @@ class TestMergeOptimizer:
        # Merge two nodes, one has assert, the other not.
        x1 = matrix("x1")
        x2 = matrix("x2")
-        e = tt.dot(x1, x2) + tt.dot(assert_op(x1, (x1 > x2).all()), x2)
+        e = dot(x1, x2) + dot(assert_op(x1, (x1 > x2).all()), x2)
        g = FunctionGraph([x1, x2], [e])
        MergeOptimizer().optimize(g)
        strg = theano.printing.debugprint(g, file="str")
@@ -354,7 +354,7 @@ class TestMergeOptimizer:
        # with the same conditions.
        x1 = matrix("x1")
        x2 = matrix("x2")
-        e = tt.dot(assert_op(x1, (x1 > x2).all()), x2) + tt.dot(
+        e = dot(assert_op(x1, (x1 > x2).all()), x2) + dot(
            assert_op(x1, (x1 > x2).all()), x2
        )
        g = FunctionGraph([x1, x2], [e])
@@ -381,7 +381,7 @@ class TestMergeOptimizer:
        x1 = matrix("x1")
        x2 = matrix("x2")
        x3 = matrix("x3")
-        e = tt.dot(assert_op(x1, (x1 > x3).all()), x2) + tt.dot(
+        e = dot(assert_op(x1, (x1 > x3).all()), x2) + dot(
            assert_op(x1, (x1 > x2).all()), x2
        )
        g = FunctionGraph([x1, x2, x3], [e])
@@ -426,7 +426,7 @@ class TestMergeOptimizer:
        x1 = matrix("x1")
        x2 = matrix("x2")
        x3 = matrix("x3")
-        e = tt.dot(assert_op(x1, (x1 > x3).all()), x2) + tt.dot(
+        e = dot(assert_op(x1, (x1 > x3).all()), x2) + dot(
            x1, assert_op(x2, (x2 > x3).all())
        )
        g = FunctionGraph([x1, x2, x3], [e])
@@ -457,7 +457,7 @@ class TestMergeOptimizer:
        x1 = matrix("x1")
        x2 = matrix("x2")
        x3 = matrix("x3")
-        e = tt.dot(x1, assert_op(x2, (x2 > x3).all())) + tt.dot(
+        e = dot(x1, assert_op(x2, (x2 > x3).all())) + dot(
            assert_op(x1, (x1 > x3).all()), x2
        )
        g = FunctionGraph([x1, x2, x3], [e])

--- a/tests/graph/test_sched.py
+++ b/tests/graph/test_sched.py
-from theano import tensor as tt
 from theano.graph.basic import io_toposort
 from theano.graph.sched import (
    _toposort,
@@ -7,6 +6,7 @@ from theano.graph.sched import (
    reverse_dict,
    sort_apply_nodes,
 )
+from theano.tensor.math import dot
 from theano.tensor.type import matrix
 from theano.utils import cmp
@@ -15,7 +15,7 @@ def test_dependence():
    dependence = make_dependence_cmp()
    x = matrix("x")
-    y = tt.dot(x * 2, x + 1)
+    y = dot(x * 2, x + 1)
    nodes = io_toposort([x], [y])
    for a, b in zip(nodes[:-1], nodes[1:]):
@@ -24,7 +24,7 @@ def test_dependence():
 def test_sort_apply_nodes():
    x = matrix("x")
-    y = tt.dot(x * 2, x + 1)
+    y = dot(x * 2, x + 1)
    def str_cmp(a, b):
        return cmp(str(a), str(b))  # lexicographical sort

--- a/tests/graph/test_toolbox.py
+++ b/tests/graph/test_toolbox.py
-from theano import tensor as tt
 from theano.graph.basic import Apply, Variable
 from theano.graph.fg import FunctionGraph
 from theano.graph.op import Op
 from theano.graph.toolbox import NodeFinder, is_same_graph
 from theano.graph.type import Type
+from theano.tensor.math import neg
 from theano.tensor.type import vectors
@@ -122,8 +122,8 @@ class TestIsSameGraph:
                        ({y: x}, True),
                    ),
                ),
-                (x, tt.neg(x), (({}, False),)),
+                (x, neg(x), (({}, False),)),
-                (x, tt.neg(y), (({}, False),)),
+                (x, neg(y), (({}, False),)),
            ]
        )

--- a/tests/link/test_jax.py
+++ b/tests/link/test_jax.py
--- a/tests/link/test_vm.py
+++ b/tests/link/test_vm.py
@@ -5,18 +5,20 @@ import time
 import numpy as np
 import pytest
-import theano
+from theano.compile.function import function
-from theano import function
-from theano import tensor as tt
 from theano.compile.io import In
-from theano.compile.mode import Mode
+from theano.compile.mode import Mode, get_mode
+from theano.compile.sharedvalue import shared
 from theano.configdefaults import config
 from theano.graph.basic import Apply
 from theano.graph.op import Op
 from theano.ifelse import ifelse
+from theano.link.c.basic import OpWiseCLinker
 from theano.link.c.exceptions import MissingGXX
 from theano.link.vm import Loop, VMLinker
+from theano.tensor.math import cosh, sin, tanh
 from theano.tensor.type import dvector, lscalar, scalar, scalars, vector, vectors
+from theano.tensor.var import TensorConstant
 class TestCallbacks:
@@ -59,7 +61,7 @@ def test_c_thunks():
    a = scalars("a")
    b, c = vectors("bc")
    cases = [False]
-    if theano.config.cxx:
+    if config.cxx:
        cases.append(True)
    for c_thunks in cases:
        f = function(
@@ -76,7 +78,7 @@ def test_c_thunks():
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_speed():
    def build_graph(x, depth=5):
@@ -94,7 +96,7 @@ def test_speed():
    def time_numpy():
        steps_a = 5
        steps_b = 100
-        x = np.asarray([2.0, 3.0], dtype=theano.config.floatX)
+        x = np.asarray([2.0, 3.0], dtype=config.floatX)
        numpy_version(x, steps_a)
        t0 = time.time()
@@ -134,12 +136,10 @@ def test_speed():
        print(f"{name} takes {1000 * (t_b - t_a) / (steps_b - steps_a):f} s/Kop")
-    from theano.link.c.basic import OpWiseCLinker
    time_linker("c|py", OpWiseCLinker)
    time_linker("vmLinker", VMLinker)
    time_linker("vmLinker_nogc", lambda: VMLinker(allow_gc=False))
-    if theano.config.cxx:
+    if config.cxx:
        time_linker("vmLinker_CLOOP", lambda: VMLinker(allow_gc=False, use_cloop=True))
    time_numpy()
@@ -179,7 +179,7 @@ def test_speed_lazy():
    time_linker("vmLinker", VMLinker)
    time_linker("vmLinker_nogc", lambda: VMLinker(allow_gc=False))
-    if theano.config.cxx:
+    if config.cxx:
        time_linker("vmLinker_C", lambda: VMLinker(allow_gc=False, use_cloop=True))
@@ -189,7 +189,7 @@ def test_partial_function():
    def check_partial_function(linker_name):
        x = scalar("input")
        y = x ** 2
-        f = theano.function(
+        f = function(
            [x], [y + 7, y - 9, y / 14.0], mode=Mode(optimizer=None, linker=linker_name)
        )
@@ -198,19 +198,19 @@ def test_partial_function():
        utt.assert_allclose(f(5), np.array([32.0, 16.0, 1.7857142857142858]))
    check_partial_function(VMLinker(allow_partial_eval=True, use_cloop=False))
-    if not theano.config.cxx:
+    if not config.cxx:
        pytest.skip("Need cxx for this test")
    check_partial_function("cvm")
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_partial_function_with_output_keys():
    def check_partial_function_output_keys(linker_name):
        x = scalar("input")
        y = 3 * x
-        f = theano.function(
+        f = function(
            [x], {"a": y * 5, "b": y - 7}, mode=Mode(optimizer=None, linker=linker_name)
        )
@@ -223,19 +223,19 @@ def test_partial_function_with_output_keys():
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_partial_function_with_updates():
    def check_updates(linker_name):
        x = lscalar("input")
-        y = theano.shared(np.asarray(1, "int64"), name="global")
+        y = shared(np.asarray(1, "int64"), name="global")
-        f = theano.function(
+        f = function(
            [x],
            [x, x + 34],
            updates=[(y, x + 1)],
            mode=Mode(optimizer=None, linker=linker_name),
        )
-        g = theano.function(
+        g = function(
            [x],
            [x - 6],
            updates=[(y, y + 3)],
@@ -253,12 +253,12 @@ def test_partial_function_with_updates():
 def test_allow_gc_cvm():
-    mode = theano.config.mode
+    mode = config.mode
    if mode in ["DEBUG_MODE", "DebugMode"]:
        mode = "FAST_RUN"
    v = vector()
-    f = theano.function([v], v + 1, mode=mode)
+    f = function([v], v + 1, mode=mode)
    f([1])
    n = list(f.maker.fgraph.apply_nodes)[0].outputs[0]
@@ -289,7 +289,7 @@ if run_memory_usage_tests:
            x = vector()
            z = x
            for d in range(10):
-                z = tt.sin(-z + 1)
+                z = sin(-z + 1)
            f = function([x], z, mode=Mode(optimizer=None, linker="cvm"))
            if not i % 100:
@@ -347,7 +347,7 @@ if run_memory_usage_tests:
        def build_graph(x, depth=5):
            z = x
            for d in range(depth):
-                z = tt.sin(-z + 1)
+                z = sin(-z + 1)
            return z
        def time_linker(name, linker):
@@ -391,35 +391,33 @@ def test_vm_gc():
    x = vector()
    p = RunOnce()(x)
    mode = Mode(linker=VMLinker(lazy=True))
-    f = theano.function([In(x, mutable=True)], [p + 1, p + 2], mode=mode)
+    f = function([In(x, mutable=True)], [p + 1, p + 2], mode=mode)
    f([1, 2, 3])
    p = RunOnce()(x)
    pp = p + p
-    f = theano.function([x], [pp + pp], mode=mode)
+    f = function([x], [pp + pp], mode=mode)
    f([1, 2, 3])
 def test_reallocation():
    x = scalar("x")
    y = scalar("y")
-    z = tt.tanh(3 * x + y) + tt.cosh(x + 5 * y)
+    z = tanh(3 * x + y) + cosh(x + 5 * y)
    # The functinality is currently implement for non lazy and non c VM only.
    for linker in [
        VMLinker(allow_gc=False, lazy=False, use_cloop=False),
        VMLinker(allow_gc=True, lazy=False, use_cloop=False),
    ]:
-        m = theano.compile.get_mode(Mode(linker=linker))
+        m = get_mode(Mode(linker=linker))
        m = m.excluding("fusion", "inplace")
-        f = theano.function([x, y], z, name="test_reduce_memory", mode=m)
+        f = function([x, y], z, name="test_reduce_memory", mode=m)
        output = f(1, 2)
        assert output
        storage_map = f.fn.storage_map
        def check_storage(storage_map):
-            from theano.tensor.var import TensorConstant
            for i in storage_map:
                if not isinstance(i, TensorConstant):
                    keys_copy = list(storage_map.keys())[:]
@@ -434,7 +432,7 @@ def test_reallocation():
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_no_recycling():
    x = vector()
@@ -446,15 +444,15 @@ def test_no_recycling():
    ]:
        mode = Mode(optimizer="fast_compile", linker=lnk)
-        f = theano.function([x], x + 1, mode=mode)
+        f = function([x], x + 1, mode=mode)
-        f2 = theano.function([x], (x + 1) * 2, mode=mode)
+        f2 = function([x], (x + 1) * 2, mode=mode)
        m1 = f.fn.thunks[0].thunk.module
        m2 = f2.fn.thunks[0].thunk.module
        assert m1 is m2
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_VMLinker_make_vm_cvm():
    # We don't want this at module level, since CXX might not be present

--- a/tests/record.py
+++ b/tests/record.py
@@ -6,7 +6,6 @@ from theano.printing import hex_digest, min_informative_str
 __authors__ = ["PyMC Team", "Ian Goodfellow"]
-__credits__ = ["PyMC Team", "Ian Goodfellow"]
 class MismatchError(Exception):

--- a/tests/sandbox/linalg/test_linalg.py
+++ b/tests/sandbox/linalg/test_linalg.py
@@ -19,8 +19,8 @@ from theano.sandbox.linalg.ops import (
    solve,
    spectral_radius_bound,
 )
-from theano.tensor.basic import _allclose
 from theano.tensor.elemwise import DimShuffle
+from theano.tensor.math import _allclose
 from theano.tensor.nlinalg import MatrixInverse
 from theano.tensor.type import dmatrix, matrix, vector

--- a/tests/sandbox/test_rng_mrg.py
+++ b/tests/sandbox/test_rng_mrg.py
--- a/tests/scan/test_basic.py
+++ b/tests/scan/test_basic.py
--- a/tests/scan/test_checkpoints.py
+++ b/tests/scan/test_checkpoints.py
--- a/tests/scan/test_opt.py
+++ b/tests/scan/test_opt.py
--- a/tests/sparse/test_basic.py
+++ b/tests/sparse/test_basic.py
--- a/tests/sparse/test_opt.py
+++ b/tests/sparse/test_opt.py
--- a/tests/tensor/nnet/test_basic.py
+++ b/tests/tensor/nnet/test_basic.py
--- a/tests/tensor/nnet/test_bn.py
+++ b/tests/tensor/nnet/test_bn.py
--- a/tests/tensor/nnet/test_conv.py
+++ b/tests/tensor/nnet/test_conv.py
--- a/tests/tensor/nnet/test_sigm.py
+++ b/tests/tensor/nnet/test_sigm.py
--- a/tests/tensor/random/test_op.py
+++ b/tests/tensor/random/test_op.py
--- a/tests/tensor/signal/test_conv.py
+++ b/tests/tensor/signal/test_conv.py
--- a/tests/tensor/signal/test_pool.py
+++ b/tests/tensor/signal/test_pool.py
--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
--- a/tests/tensor/test_blas_scipy.py
+++ b/tests/tensor/test_blas_scipy.py
--- a/tests/tensor/test_complex.py
+++ b/tests/tensor/test_complex.py
--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
--- a/tests/tensor/test_extra_ops.py
+++ b/tests/tensor/test_extra_ops.py
--- a/tests/tensor/test_keepdims.py
+++ b/tests/tensor/test_keepdims.py
--- a/tests/tensor/test_math.py
+++ b/tests/tensor/test_math.py
--- a/tests/tensor/test_misc.py
+++ b/tests/tensor/test_misc.py
--- a/tests/tensor/test_mlp.py
+++ b/tests/tensor/test_mlp.py
--- a/tests/tensor/test_nlinalg.py
+++ b/tests/tensor/test_nlinalg.py
--- a/tests/tensor/test_opt.py
+++ b/tests/tensor/test_opt.py
--- a/tests/tensor/test_opt_uncanonicalize.py
+++ b/tests/tensor/test_opt_uncanonicalize.py
--- a/tests/tensor/test_shape.py
+++ b/tests/tensor/test_shape.py
--- a/tests/tensor/test_subtensor.py
+++ b/tests/tensor/test_subtensor.py
--- a/tests/tensor/test_type.py
+++ b/tests/tensor/test_type.py
--- a/tests/tensor/test_type_other.py
+++ b/tests/tensor/test_type_other.py
--- a/tests/test_2nd_order_grads.py
+++ b/tests/test_2nd_order_grads.py
--- a/tests/test_breakpoint.py
+++ b/tests/test_breakpoint.py
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
--- a/tests/test_ifelse.py
+++ b/tests/test_ifelse.py
--- a/tests/test_pickle_unpickle_theano_fn.py
+++ b/tests/test_pickle_unpickle_theano_fn.py
--- a/tests/test_rop.py
+++ b/tests/test_rop.py
--- a/tests/unittest_tools.py
+++ b/tests/unittest_tools.py
--- a/theano/breakpoint.py
+++ b/theano/breakpoint.py
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
--- a/theano/compile/nanguardmode.py
+++ b/theano/compile/nanguardmode.py
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
--- a/theano/gpuarray/ctc.py
+++ b/theano/gpuarray/ctc.py
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
--- a/theano/gpuarray/dnn_opt.py
+++ b/theano/gpuarray/dnn_opt.py
--- a/theano/gpuarray/fft.py
+++ b/theano/gpuarray/fft.py
--- a/theano/gpuarray/linalg.py
+++ b/theano/gpuarray/linalg.py
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
--- a/theano/gpuarray/opt_util.py
+++ b/theano/gpuarray/opt_util.py
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
--- a/theano/graph/basic.py
+++ b/theano/graph/basic.py
--- a/theano/graph/sched.py
+++ b/theano/graph/sched.py
--- a/theano/ifelse.py
+++ b/theano/ifelse.py
--- a/theano/link/jax/jax_dispatch.py
+++ b/theano/link/jax/jax_dispatch.py
--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
--- a/theano/misc/elemwise_time_test.py
+++ b/theano/misc/elemwise_time_test.py
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
--- a/theano/scan/basic.py
+++ b/theano/scan/basic.py
--- a/theano/scan/checkpoints.py
+++ b/theano/scan/checkpoints.py
--- a/theano/scan/op.py
+++ b/theano/scan/op.py
--- a/theano/scan/opt.py
+++ b/theano/scan/opt.py
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
--- a/theano/sparse/sandbox/sp.py
+++ b/theano/sparse/sandbox/sp.py
--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
--- a/theano/tensor/fft.py
+++ b/theano/tensor/fft.py
--- a/theano/tensor/fourier.py
+++ b/theano/tensor/fourier.py
--- a/theano/tensor/math.py
+++ b/theano/tensor/math.py
--- a/theano/tensor/nlinalg.py
+++ b/theano/tensor/nlinalg.py
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
--- a/theano/tensor/nnet/basic.py
+++ b/theano/tensor/nnet/basic.py
--- a/theano/tensor/nnet/blocksparse.py
+++ b/theano/tensor/nnet/blocksparse.py
--- a/theano/tensor/nnet/bn.py
+++ b/theano/tensor/nnet/bn.py
--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
--- a/theano/tensor/nnet/neighbours.py
+++ b/theano/tensor/nnet/neighbours.py
--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
--- a/theano/tensor/opt_uncanonicalize.py
+++ b/theano/tensor/opt_uncanonicalize.py
--- a/theano/tensor/random/op.py
+++ b/theano/tensor/random/op.py
--- a/theano/tensor/random/opt.py
+++ b/theano/tensor/random/opt.py
--- a/theano/tensor/random/utils.py
+++ b/theano/tensor/random/utils.py
--- a/theano/tensor/shape.py
+++ b/theano/tensor/shape.py
--- a/theano/tensor/signal/pool.py
+++ b/theano/tensor/signal/pool.py
--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
--- a/theano/tensor/sort.py
+++ b/theano/tensor/sort.py
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py