Remove theano.dot (#149)

This replaces `theano.tensor.basic.dot` with the old `theano.dot`, removes the latter, and renames `theano.tensor.basic.dot` to `theano.tensor.basic.dense_dot`.

Remove theano.dot (#149)
9fcb536e · George Ho · GitHub · 4249a01c · 9fcb536e · 9fcb536e
--- a/tests/compile/test_pfunc.py
+++ b/tests/compile/test_pfunc.py
@@ -786,7 +786,7 @@ class TestAliasingRules:
                theano.In(m1, mutable=True),
                theano.In(m2, mutable=True),
            ],
-            theano.dot((x * 2), m1) + theano.dot((y * 3), m2),
+            theano.tensor.dot((x * 2), m1) + theano.tensor.dot((y * 3), m2),
        )
        # Test 1. If the same variable is given twice
@@ -851,9 +851,9 @@ class TestAliasingRules:
                theano.In(m3, mutable=True),
            ],
            (
-                theano.dot((x * 2), m1)
+                theano.tensor.dot((x * 2), m1)
-                + theano.dot((y * 3), m2)
+                + theano.tensor.dot((y * 3), m2)
-                + theano.dot((z * 4), m3)
+                + theano.tensor.dot((z * 4), m3)
            ),
        )

--- a/tests/scan_module/test_scan.py
+++ b/tests/scan_module/test_scan.py
@@ -485,8 +485,10 @@ class TestScan:
        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
            return [
-                theano.dot(u1_t, W_in1) + u2_t * W_in2 + theano.dot(x_tm1, W),
+                theano.tensor.dot(u1_t, W_in1)
-                theano.dot(x_tm1, W_out),
+                + u2_t * W_in2
+                + theano.tensor.dot(x_tm1, W),
+                theano.tensor.dot(x_tm1, W_out),
            ]
        outputs, updates = theano.scan(
@@ -539,11 +541,11 @@ class TestScan:
        def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
            return [
-                theano.dot(u1_t, W_in1)
+                theano.tensor.dot(u1_t, W_in1)
                + (u2_t + u2_tm1 * u2_tp1) * W_in2
-                + theano.dot(x_tm1, W),
+                + theano.tensor.dot(x_tm1, W),
-                (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
+                (y_tm1 + y_tm3) * theano.tensor.dot(x_tm1, W_out),
-                theano.dot(u1_t, W_in1),
+                theano.tensor.dot(u1_t, W_in1),
            ]
        outputs, updates = theano.scan(
@@ -1107,13 +1109,13 @@ class TestScan:
        def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
            y0_t = (
-                theano.dot(theano.dot(u1_t, W1), W2)
+                theano.tensor.dot(theano.tensor.dot(u1_t, W1), W2)
                + 0.1 * y0_tm1
                + 0.33 * y0_tm2
                + 0.17 * y0_tm3
            )
-            y1_t = theano.dot(u2_t, W2) + y1_tm1
+            y1_t = theano.tensor.dot(u2_t, W2) + y1_tm1
-            y2_t = theano.dot(u1_t, W1)
+            y2_t = theano.tensor.dot(u1_t, W1)
            nwW1 = W1 + 0.1
            nwW2 = W2 + 0.05
            # return outputs followed by a list of updates
@@ -1250,11 +1252,15 @@ class TestScan:
        trng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())
        def f(vsample_tm1):
-            hmean_t = theano.tensor.nnet.sigmoid(theano.dot(vsample_tm1, W) + bhid)
+            hmean_t = theano.tensor.nnet.sigmoid(
+                theano.tensor.dot(vsample_tm1, W) + bhid
+            )
            hsample_t = theano.tensor.cast(
                trng.binomial(hmean_t.shape, 1, hmean_t), dtype="float32"
            )
-            vmean_t = theano.tensor.nnet.sigmoid(theano.dot(hsample_t, W.T) + bvis)
+            vmean_t = theano.tensor.nnet.sigmoid(
+                theano.tensor.dot(hsample_t, W.T) + bvis
+            )
            return theano.tensor.cast(
                trng.binomial(vmean_t.shape, 1, vmean_t), dtype="float32"
            )
@@ -1463,8 +1469,10 @@ class TestScan:
        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
            return [
-                theano.dot(u1_t, W_in1) + u2_t * W_in2 + theano.dot(x_tm1, W),
+                theano.tensor.dot(u1_t, W_in1)
-                theano.dot(x_tm1, W_out),
+                + u2_t * W_in2
+                + theano.tensor.dot(x_tm1, W),
+                theano.tensor.dot(x_tm1, W_out),
            ]
        cost, updates = scan_project_sum(
@@ -1532,11 +1540,11 @@ class TestScan:
        def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
            return [
-                theano.dot(u1_t, W_in1)
+                theano.tensor.dot(u1_t, W_in1)
                + (u2_t + u2_tm1 * u2_tp1) * W_in2
-                + theano.dot(x_tm1, W),
+                + theano.tensor.dot(x_tm1, W),
-                (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
+                (y_tm1 + y_tm3) * theano.tensor.dot(x_tm1, W_out),
-                theano.dot(u1_t, W_in1),
+                theano.tensor.dot(u1_t, W_in1),
            ]
        # We change the compute_test_value[_opt] flag to run the
@@ -1795,10 +1803,10 @@ for{cpu,scan_fn}.2 [id H] ''
        def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
            return [
-                theano.dot(u1_t, W_in1)
+                theano.tensor.dot(u1_t, W_in1)
                + (u2_t + u2_tm1 * u2_tp1) * W_in2
-                + theano.dot(x_tm1, W),
+                + theano.tensor.dot(x_tm1, W),
-                (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
+                (y_tm1 + y_tm3) * theano.tensor.dot(x_tm1, W_out),
            ]
        cost, updates = scan_project_sum(
@@ -1853,7 +1861,7 @@ for{cpu,scan_fn}.2 [id H] ''
            trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
            x_t = (
                theano.tensor.cast(u2_t, theano.config.floatX)
-                + theano.dot(u_t, W_in)
+                + theano.tensor.dot(u_t, W_in)
                + x_tm1
                + trng1.uniform(low=-1.1, high=1.1, dtype=theano.config.floatX)
            )
@@ -1935,7 +1943,7 @@ for{cpu,scan_fn}.2 [id H] ''
        def f_rnn_cmpl(u_t, x_tm1, W_in):
            trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
            rnd_nb = trng1.uniform(low=-0.1, high=0.1)
-            x_t = theano.dot(u_t, W_in) + x_tm1 + rnd_nb
+            x_t = theano.tensor.dot(u_t, W_in) + x_tm1 + rnd_nb
            x_t = theano.tensor.cast(x_t, dtype=theano.config.floatX)
            return x_t
@@ -2026,8 +2034,10 @@ for{cpu,scan_fn}.2 [id H] ''
        # prior results: h_tm2, h_tm1
        # non-sequences: W_ih, W_hh, W_ho, b_h
        def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
-            h_t = tensor.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm2, W_hh) + b_h)
+            h_t = tensor.tanh(
-            y_t = theano.dot(h_t, W_ho) + b_o
+                theano.tensor.dot(x_t, W_ih) + theano.tensor.dot(h_tm2, W_hh) + b_h
+            )
+            y_t = theano.tensor.dot(h_t, W_ho) + b_o
            return [h_t, y_t]
        # hidden and outputs of the entire sequence
@@ -2181,7 +2191,7 @@ for{cpu,scan_fn}.2 [id H] ''
        A = theano.tensor.matrix("A")
        fc1 = theano.shared(0.5, name="fc1")
        fc2 = theano.shared(0.9, name="fc2")
-        y = fc1 * theano.dot(x * x, theano.dot(A, x))
+        y = fc1 * theano.tensor.dot(x * x, theano.tensor.dot(A, x))
        y.name = "y"
        gy = theano.tensor.grad(y, x)
        gy.name = "gy"
@@ -2326,8 +2336,10 @@ for{cpu,scan_fn}.2 [id H] ''
            return [
                y_tm3 + 1,
                y_tm3 + 2,
-                theano.dot(u1_t, W_in1) + u2_t * W_in2 + theano.dot(x_tm1, W),
+                theano.tensor.dot(u1_t, W_in1)
-                y_tm1 + theano.dot(x_tm1, W_out),
+                + u2_t * W_in2
+                + theano.tensor.dot(x_tm1, W),
+                y_tm1 + theano.tensor.dot(x_tm1, W_out),
            ]
        outputs, updates = theano.scan(
@@ -2407,8 +2419,10 @@ for{cpu,scan_fn}.2 [id H] ''
        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
            return [
                y_tm3 + 1,
-                theano.dot(u1_t, W_in1) + u2_t * W_in2 + theano.dot(x_tm1, W),
+                theano.tensor.dot(u1_t, W_in1)
-                y_tm1 + theano.dot(x_tm1, W_out),
+                + u2_t * W_in2
+                + theano.tensor.dot(x_tm1, W),
+                y_tm1 + theano.tensor.dot(x_tm1, W_out),
            ]
        _outputs, updates = theano.scan(
@@ -4022,8 +4036,10 @@ for{cpu,scan_fn}.2 [id H] ''
        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
            return [
                y_tm3 + 1,
-                theano.dot(u1_t, W_in1) + u2_t * W_in2 + theano.dot(x_tm1, W),
+                theano.tensor.dot(u1_t, W_in1)
-                y_tm1 + theano.dot(x_tm1, W_out),
+                + u2_t * W_in2
+                + theano.tensor.dot(x_tm1, W),
+                y_tm1 + theano.tensor.dot(x_tm1, W_out),
            ]
        rval, updates = theano.scan(
@@ -4069,7 +4085,7 @@ for{cpu,scan_fn}.2 [id H] ''
        A = theano.tensor.matrix("A")
        z, updates = theano.scan(
-            theano.dot, sequences=[], non_sequences=[x, A], n_steps=2
+            theano.tensor.dot, sequences=[], non_sequences=[x, A], n_steps=2
        )
        f = theano.function([x, A], z)
        topo = f.maker.fgraph.toposort()
@@ -5603,7 +5619,7 @@ def test_compute_test_value_grad_cast():
        w = theano.shared(np.random.randn(4, 3).astype(floatX), name="w")
        outputs, _ = theano.scan(
-            lambda i, h, w: (theano.dot(h[i], w), i),
+            lambda i, h, w: (theano.tensor.dot(h[i], w), i),
            outputs_info=[None, 0],
            non_sequences=[h, w],
            n_steps=3,

--- a/tests/sparse/test_basic.py
+++ b/tests/sparse/test_basic.py
@@ -535,7 +535,7 @@ class TestSparseInferShape(utt.InferShapeTester):
            (tensor.matrix(), SparseType("csr", "float32")()),
        ]:
-            sparse_out = theano.dot(x, y)
+            sparse_out = tensor.dot(x, y)
            if isinstance(x, sparse.SparseVariable):
                x = tensor.matrix()
            if isinstance(y, sparse.SparseVariable):
@@ -1342,7 +1342,7 @@ class TestStructuredDot:
            for sparse_format_b in ["csc", "csr", "bsr"]:
                a = SparseType(sparse_format_a, dtype=sparse_dtype)()
                b = SparseType(sparse_format_b, dtype=sparse_dtype)()
-                d = theano.dot(a, b)
+                d = theano.tensor.dot(a, b)
                f = theano.function([a, b], theano.Out(d, borrow=True))
                for M, N, K, nnz in [
                    (4, 3, 2, 3),
@@ -1364,7 +1364,7 @@ class TestStructuredDot:
        a = SparseType("csc", dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
-        d = theano.dot(a, b)
+        d = theano.tensor.dot(a, b)
        f = theano.function([a, b], theano.Out(d, borrow=True))
        for M, N, K, nnz in [
@@ -1412,7 +1412,7 @@ class TestStructuredDot:
        a = SparseType("csr", dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
-        d = theano.dot(a, b)
+        d = theano.tensor.dot(a, b)
        f = theano.function([a, b], d)
        for M, N, K, nnz in [

--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -114,9 +114,9 @@ from theano.tensor import (
    constant,
    cscalar,
    default,
+    dense_dot,
    diag,
    dmatrix,
-    dot,
    dscalar,
    dscalars,
    dtensor3,
@@ -708,7 +708,7 @@ TestConjBroadcast = makeBroadcastTester(
 TestDot = makeTester(
    name="DotTester",
-    op=dot,
+    op=dense_dot,
    expected=lambda x, y: np.dot(x, y),
    checks={},
    good=dict(
@@ -1140,7 +1140,7 @@ class TestAlloc:
                (some_matrix[idx, idx], 1),
            ],
        ):
-            derp = sum(dot(subtensor, variables))
+            derp = sum(dense_dot(subtensor, variables))
            fobj = theano.function([some_vector], derp, mode=self.mode)
            grad_derp = theano.grad(derp, some_vector)
@@ -3659,7 +3659,7 @@ class TestDot:
            return type(x), x.dtype, x.shape
        nz = np.dot(x, y)
-        tz = eval_outputs([dot(as_tensor_variable(x), as_tensor_variable(y))])
+        tz = eval_outputs([dense_dot(as_tensor_variable(x), as_tensor_variable(y))])
        assert tz.dtype == nz.dtype, (tz.dtype, tz.dtype.num, nz.dtype, nz.dtype.num)
        assert tz.shape == nz.shape, (tz.shape, nz.shape)
        utt.assert_allclose(nz, tz, rtol=1e-4, atol=1e-4)
@@ -3797,7 +3797,7 @@ class TestDot:
    def not_aligned(self, x, y):
        with change_flags(compute_test_value="off"):
-            z = dot(x, y)
+            z = dense_dot(x, y)
        with pytest.raises(ValueError):
            eval_outputs([z])
@@ -3813,19 +3813,19 @@ class TestDot:
        self.not_aligned(rand(5, 4, 3), rand(6, 7, 8))
    def test_grad(self):
-        utt.verify_grad(dot, [rand(2, 3), rand(3, 2)])
+        utt.verify_grad(dense_dot, [rand(2, 3), rand(3, 2)])
-        utt.verify_grad(dot, [rand(2), rand(2, 3)])
+        utt.verify_grad(dense_dot, [rand(2), rand(2, 3)])
-        utt.verify_grad(dot, [rand(3, 2), rand(2)])
+        utt.verify_grad(dense_dot, [rand(3, 2), rand(2)])
-        utt.verify_grad(dot, [rand(2), rand(2)])
+        utt.verify_grad(dense_dot, [rand(2), rand(2)])
-        utt.verify_grad(dot, [rand(), rand(2)])
+        utt.verify_grad(dense_dot, [rand(), rand(2)])
-        utt.verify_grad(dot, [rand(), rand(2, 5)])
+        utt.verify_grad(dense_dot, [rand(), rand(2, 5)])
-        utt.verify_grad(dot, [rand(2), rand()])
+        utt.verify_grad(dense_dot, [rand(2), rand()])
-        utt.verify_grad(dot, [rand(2, 5), rand()])
+        utt.verify_grad(dense_dot, [rand(2, 5), rand()])
-        utt.verify_grad(dot, [rand(2, 3, 4), rand(4)])
+        utt.verify_grad(dense_dot, [rand(2, 3, 4), rand(4)])
-        utt.verify_grad(dot, [rand(3), rand(2, 3, 4)])
+        utt.verify_grad(dense_dot, [rand(3), rand(2, 3, 4)])
-        utt.verify_grad(dot, [rand(4, 3), rand(2, 3, 4)])
+        utt.verify_grad(dense_dot, [rand(4, 3), rand(2, 3, 4)])
-        utt.verify_grad(dot, [rand(2, 3, 4), rand(4, 5)])
+        utt.verify_grad(dense_dot, [rand(2, 3, 4), rand(4, 5)])
-        utt.verify_grad(dot, [rand(2, 3, 4), rand(3, 4, 5)])
+        utt.verify_grad(dense_dot, [rand(2, 3, 4), rand(3, 4, 5)])
    @pytest.mark.slow
    def test_broadcastable_patterns(self):
@@ -3882,7 +3882,7 @@ class TestDot:
                    ):
                        y = TensorType(dtype=dtype1, broadcastable=bc1)()
-                        z = dot(x, y)
+                        z = dense_dot(x, y)
                        t = TensorType(dtype=dtype0, broadcastable=z.broadcastable)()
                        rval = z * 3 + 2 * t

--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
@@ -1254,7 +1254,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        rng = np.random.RandomState(unittest_tools.fetch_seed())
        v = theano.shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
        w = theano.shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
-        f = theano.function([], theano.dot(v, w), mode=mode_blas_opt)
+        f = theano.function([], theano.tensor.dot(v, w), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
        self.assertFunctionContains0(f, tt.dot)
@@ -1268,7 +1268,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        rng = np.random.RandomState(unittest_tools.fetch_seed())
        v = theano.shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
        m = theano.shared(np.array(rng.uniform(size=(2, 3)), dtype="float32"))
-        f = theano.function([], theano.dot(v, m), mode=mode_blas_opt)
+        f = theano.function([], theano.tensor.dot(v, m), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
        self.assertFunctionContains0(f, tt.dot)
@@ -1285,7 +1285,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        rng = np.random.RandomState(unittest_tools.fetch_seed())
        v = theano.shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
        m = theano.shared(np.array(rng.uniform(size=(3, 2)), dtype="float32"))
-        f = theano.function([], theano.dot(m, v), mode=mode_blas_opt)
+        f = theano.function([], theano.tensor.dot(m, v), mode=mode_blas_opt)
        # Assert that the dot was optimized somehow
        self.assertFunctionContains0(f, tt.dot)
@@ -1306,7 +1306,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        v2 = theano.shared(v2_orig)
        m = theano.shared(np.array(rng.uniform(size=m_shp), dtype="float32"))
-        f = theano.function([], v2 + theano.dot(m, v1), mode=mode_blas_opt)
+        f = theano.function([], v2 + theano.tensor.dot(m, v1), mode=mode_blas_opt)
        # Assert they produce the same output
        assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig)
@@ -1317,7 +1317,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        # test the inplace version
        g = theano.function(
-            [], [], updates=[(v2, v2 + theano.dot(m, v1))], mode=mode_blas_opt
+            [], [], updates=[(v2, v2 + theano.tensor.dot(m, v1))], mode=mode_blas_opt
        )
        # Assert they produce the same output
@@ -1355,7 +1355,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        v2 = theano.shared(v2_orig)
        m = theano.shared(np.array(rng.uniform(size=(2, 3)), dtype="float32"))
-        f = theano.function([], v2 + theano.dot(v1, m), mode=mode_blas_opt)
+        f = theano.function([], v2 + theano.tensor.dot(v1, m), mode=mode_blas_opt)
        # Assert they produce the same output
        assert np.allclose(f(), np.dot(v1.get_value(), m.get_value()) + v2.get_value())
@@ -1365,7 +1365,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
        # test the inplace version
        g = theano.function(
-            [], [], updates=[(v2, v2 + theano.dot(v1, m))], mode=mode_blas_opt
+            [], [], updates=[(v2, v2 + theano.tensor.dot(v1, m))], mode=mode_blas_opt
        )
        # Assert they produce the same output
@@ -1397,7 +1397,7 @@ class TestGemv(unittest_tools.OptimizationTestMixin):
            np.array(rng.uniform(size=(1, 2)), dtype="float32"),
            broadcastable=(True, False),
        )
-        o = theano.dot(m, v1)
+        o = theano.tensor.dot(m, v1)
        f = theano.function([], o + v2, mode=mode_blas_opt)
        # Assert they produce the same output

--- a/tests/tensor/test_blas_c.py
+++ b/tests/tensor/test_blas_c.py
@@ -147,7 +147,7 @@ class TestCGemv(OptimizationTestMixin):
        mode.check_isfinite = False
        f = theano.function(
            [self.A, self.x, self.y, self.a],
-            self.a * self.y + theano.dot(self.A, self.x),
+            self.a * self.y + theano.tensor.dot(self.A, self.x),
            mode=mode,
        )
        Aval = np.ones((3, 1), dtype=self.dtype)
@@ -160,7 +160,7 @@ class TestCGemv(OptimizationTestMixin):
        skip_if_blas_ldflags_empty()
        """ Test vector dot matrix """
        f = theano.function(
-            [self.x, self.A], theano.dot(self.x, self.A), mode=self.mode
+            [self.x, self.A], theano.tensor.dot(self.x, self.A), mode=self.mode
        )
        # Assert that the dot was optimized somehow
@@ -180,7 +180,7 @@ class TestCGemv(OptimizationTestMixin):
        skip_if_blas_ldflags_empty()
        """ Test matrix dot vector """
        f = theano.function(
-            [self.A, self.y], theano.dot(self.A, self.y), mode=self.mode
+            [self.A, self.y], theano.tensor.dot(self.A, self.y), mode=self.mode
        )
        # Assert that the dot was optimized somehow
@@ -220,7 +220,7 @@ class TestCGemv(OptimizationTestMixin):
        # test the inplace version
        g = theano.function(
-            [], [], updates=[(v2, v2 + theano.dot(m, v1))], mode=self.mode
+            [], [], updates=[(v2, v2 + theano.tensor.dot(m, v1))], mode=self.mode
        )
        # Assert they produce the same output

--- a/tests/tensor/test_opt.py
+++ b/tests/tensor/test_opt.py
@@ -3954,14 +3954,14 @@ def test_local_subtensor_of_dot():
        return a.shape == b.shape and np.allclose(a, b)
    # [cst]
-    f = theano.function([m1, m2], theano.dot(m1, m2)[1], mode=mode)
+    f = theano.function([m1, m2], theano.tensor.dot(m1, m2)[1], mode=mode)
    topo = f.maker.fgraph.toposort()
    assert test_equality(f(d1, d2), np.dot(d1, d2)[1])
    # DimShuffle happen in FAST_COMPILE
    assert isinstance(topo[-1].op, (CGemv, Gemv, DimShuffle))
    # slice
-    f = theano.function([m1, m2], theano.dot(m1, m2)[1:2], mode=mode)
+    f = theano.function([m1, m2], theano.tensor.dot(m1, m2)[1:2], mode=mode)
    topo = f.maker.fgraph.toposort()
    assert test_equality(f(d1, d2), np.dot(d1, d2)[1:2])
    assert isinstance(topo[-1].op, Dot22)
@@ -3972,12 +3972,16 @@ def test_local_subtensor_of_dot():
    d1 = np.arange(30).reshape(2, 5, 3).astype(config.floatX)
    d2 = np.arange(72).reshape(4, 3, 6).astype(config.floatX) + 100
-    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
+    f = theano.function(
+        [m1, m2, idx], theano.tensor.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode
+    )
    assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1, 1:4, :, 1:])
    # if we return the gradients. We need to use same mode as before.
    assert check_stack_trace(f, ops_to_check="last")
-    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
+    f = theano.function(
+        [m1, m2, idx], theano.tensor.dot(m1, m2)[1:4, :, idx:, idx], mode=mode
+    )
    assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1:4, :, 1:, 1])
    # Now test that the stack trace is copied over properly,

--- a/tests/tensor/test_sharedvar.py
+++ b/tests/tensor/test_sharedvar.py
@@ -533,7 +533,8 @@ def makeSharedTester(
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function(
-                [], updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared)]
+                [],
+                updates=[(s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared)],
            )
            topo = f.maker.fgraph.toposort()
            f()
@@ -569,7 +570,9 @@ def makeSharedTester(
            f = theano.function(
                [],
                s_shared.shape,
-                updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared_specify)],
+                updates=[
+                    (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify)
+                ],
            )
            topo = f.maker.fgraph.toposort()
            shp = f()
@@ -606,7 +609,9 @@ def makeSharedTester(
            f = theano.function(
                [],
                s_shared.shape,
-                updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared_specify)],
+                updates=[
+                    (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify)
+                ],
            )
            topo = f.maker.fgraph.toposort()
            shp = f()

--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -160,27 +160,6 @@ np.seterr(all=_all, divide=_divide, over=_over, under=_under, invalid=_invalid)
 del _all, _divide, _over, _under, _invalid
-def dot(l, r):
-    """Return a symbolic dot product.
-    This is designed to work with both sparse and dense tensors types.
-    """
-    try:
-        res = l.__dot__(r)
-        if res is NotImplemented:
-            raise NotImplementedError()
-        return res
-    except (NotImplementedError, AttributeError, TypeError):
-        res = r.__rdot__(l)
-        if res is NotImplemented:
-            raise NotImplementedError()
-        return res
 def get_scalar_constant_value(v):
    """Return the constant scalar (i.e. 0-D) value underlying variable `v`.

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -273,6 +273,8 @@ class _sparse_py_operators:
    def __rdot__(right, left):
        return structured_dot(left, right)
+    dot = __dot__
    # N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
    #     Discussion with Fred & James (at least, and maybe others before)
    #     we decided that casting from a sparse to dense should be explicit

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6306,7 +6306,24 @@ pprint.assign(
 )
-def dot(a, b):
+def dot(l, r):
+    """Return a symbolic dot product.
+    This is designed to work with both sparse and dense tensors types.
+    """
+    try:
+        res = l.__dot__(r)
+        if res is NotImplemented:
+            raise NotImplementedError
+    except (NotImplementedError, AttributeError, TypeError):
+        res = r.__rdot__(l)
+        if res is NotImplemented:
+            raise NotImplementedError()
+    return res
+def dense_dot(a, b):
    """
    Computes the dot product of two variables.

--- a/theano/tensor/nlinalg.py
+++ b/theano/tensor/nlinalg.py
@@ -687,18 +687,18 @@ def matrix_power(M, n):
        return M
    elif n == 2:
-        return theano.dot(M, M)
+        return theano.tensor.dot(M, M)
    elif n == 3:
-        return theano.dot(theano.dot(M, M), M)
+        return theano.tensor.dot(theano.tensor.dot(M, M), M)
    result = z = None
    while n > 0:
-        z = M if z is None else theano.dot(z, z)
+        z = M if z is None else theano.tensor.dot(z, z)
        n, bit = divmod(n, 2)
        if bit:
-            result = z if result is None else theano.dot(result, z)
+            result = z if result is None else theano.tensor.dot(result, z)
    return result

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -661,10 +661,10 @@ class _tensor_py_operators:
    """The dtype of this tensor."""
    def __dot__(left, right):
-        return theano.tensor.basic.dot(left, right)
+        return theano.tensor.basic.dense_dot(left, right)
    def __rdot__(right, left):
-        return theano.tensor.basic.dot(left, right)
+        return theano.tensor.basic.dense_dot(left, right)
    dot = __dot__