Move theano.tensor.basic.batched_*dot functions to theano.tensor.blas

Because tests were moved, the RNG for subsequent tests was altered, which required a change to `utt.assert_allclose` in `tests.tensor.test_basic` for more reasonable `rtol` settings.

Move theano.tensor.basic.batched_*dot functions to theano.tensor.blas
b192515a · Brandon T. Willard · Thomas Wiecki · ef854897 · b192515a · b192515a
--- a/tests/gpuarray/test_blas.py
+++ b/tests/gpuarray/test_blas.py
@@ -22,7 +22,7 @@ from theano.gpuarray.blas import (
    gpuger_inplace,
    gpuger_no_inplace,
 )
-from theano.tensor.blas import _dot22, batched_dot, gemm_inplace, gemv, gemv_inplace
+from theano.tensor.blas import BatchedDot, _dot22, gemm_inplace, gemv, gemv_inplace
 from theano.tensor.type import matrix, tensor, tensor3, vector
@@ -186,7 +186,7 @@ gemm_batched_tests["float64"] = [
 TestGpuGemmBatch = makeTester(
    "GpuGemmBatchTester",
-    op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z,
+    op=lambda z, alpha, x, y, beta: alpha * BatchedDot()(x, y) + beta * z,
    gpu_op=gpugemmbatch_inplace,
    cases=gemm_batched_tests,
 )

--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -8,7 +8,7 @@ from tempfile import mkstemp
 import numpy as np
 import pytest
-from numpy.testing import assert_allclose, assert_almost_equal, assert_array_equal
+from numpy.testing import assert_almost_equal, assert_array_equal
 import theano
 import theano.scalar as ts
@@ -113,7 +113,6 @@ from theano.tensor.basic import (
    argmax,
    argmin,
    as_tensor_variable,
-    batched_dot,
    cast,
    choose,
    clip,
@@ -743,67 +742,6 @@ TestDenseDot = makeTester(
    bad_runtime=dict(bad1=(rand(5, 7), rand(5, 7)), bad2=(rand(5, 7), rand(8, 3))),
 )
-TestBatchedDot = makeTester(
-    name="BatchedDotTester",
-    op=batched_dot,
-    expected=(
-        lambda xs, ys: np.asarray(
-            list(
-                x * y if x.ndim == 0 or y.ndim == 0 else np.dot(x, y)
-                for x, y in zip(xs, ys)
-            ),
-            dtype=ts.upcast(xs.dtype, ys.dtype),
-        )
-    ),
-    checks={},
-    grad=dict(
-        correct1=(rand(3, 5, 7), rand(3, 7, 5)),
-        correct2=(rand(3, 5, 7), rand(3, 7, 9)),
-        correct3=(rand(3, 5, 7), rand(3, 7)),
-        correct4=(rand(3, 5), rand(3, 5, 7)),
-        correct5=(rand(3), rand(3, 5, 7)),
-        correct6=(rand(3, 5), rand(3)),
-        correct7=(rand(3, 5), rand(3, 5)),
-        correct8=(rand(3), rand(3)),
-        correct9=(rand(3, 5, 7, 11), rand(3)),
-        correct10=(rand(3, 2, 6, 5), rand(3, 5)),
-        correct11=(rand(3, 2, 6, 5), rand(3, 5, 7)),
-        correct12=(rand(3, 2, 6, 5), rand(3, 7, 5, 8)),
-        mixed1=(rand(3, 5).astype("float32"), rand(3, 5, 7)),
-        mixed2=(rand(3, 5).astype("float64"), rand(3, 5, 7)),
-    ),
-    good=dict(
-        correct1=(rand(3, 5, 7), rand(3, 7, 5)),
-        correct2=(rand(3, 5, 7), rand(3, 7, 9)),
-        correct3=(rand(3, 5, 7), rand(3, 7)),
-        correct4=(rand(3, 5), rand(3, 5, 7)),
-        correct5=(rand(3), rand(3, 5, 7)),
-        correct6=(rand(3, 5), rand(3)),
-        correct7=(rand(3, 5), rand(3, 5)),
-        correct8=(rand(3), rand(3)),
-        correct9=(rand(3, 5, 7, 11), rand(3)),
-        correct10=(rand(3, 7, 11, 5), rand(3, 5)),
-        correct11=(rand(3, 7, 11, 5), rand(3, 5, 13)),
-        correct12=(rand(3, 7, 11, 5), rand(3, 13, 5, 17)),
-        mixed1=(rand(3, 5).astype("float32"), rand(3, 5, 7)),
-        mixed2=(rand(3, 5).astype("float64"), rand(3, 5, 7)),
-    ),
-    bad_build=dict(
-        no_batch_axis2=(rand(), rand(3, 5)), no_batch_axis3=(rand(3, 5), rand())
-    ),
-    bad_runtime=dict(
-        batch_dim_mismatch1=(rand(2, 5, 7), rand(3, 7, 9)),
-        batch_dim_mismatch2=(rand(3, 5, 7), rand(2, 7, 9)),
-        batch_dim_mismatch3=(rand(3), rand(5)),
-        bad_dim1=(rand(3, 5, 7), rand(3, 5, 7)),
-        bad_dim2=(rand(3, 5, 7), rand(3, 8, 3)),
-        bad_dim3=(rand(3, 5), rand(3, 7)),
-        bad_dim4=(rand(3, 5, 7, 11), rand(3, 5)),
-        bad_dim5=(rand(3, 5, 7, 11), rand(3, 5, 13)),
-        bad_dim6=(rand(3, 5, 7, 11), rand(3, 13, 5, 17)),
-    ),
-)
 def _numpy_second(x, y):
    return np.broadcast_arrays(x, y)[1]
@@ -1595,84 +1533,6 @@ class TestClip:
 #      gradient numerically
-def test_batched_dot():
-    first = tensor3("first")
-    second = tensor3("second")
-    output = batched_dot(first, second)
-    first_val = np.random.rand(10, 10, 20).astype(config.floatX)
-    second_val = np.random.rand(10, 20, 5).astype(config.floatX)
-    result_fn = theano.function([first, second], output)
-    result = result_fn(first_val, second_val)
-    assert result.shape[0] == first_val.shape[0]
-    assert result.shape[1] == first_val.shape[1]
-    assert result.shape[2] == second_val.shape[2]
-    first_mat = dmatrix("first")
-    second_mat = dmatrix("second")
-    output = batched_dot(first_mat, second_mat)
-    first_mat_val = np.random.rand(10, 10).astype(config.floatX)
-    second_mat_val = np.random.rand(10, 10).astype(config.floatX)
-    result_fn = theano.function([first_mat, second_mat], output)
-    result = result_fn(first_mat_val, second_mat_val)
-    assert result.shape[0] == first_mat_val.shape[0]
-def test_batched_dot_not_contiguous():
-    def np_genarray(*_shape):
-        size = 1
-        for dimsize in _shape:
-            size *= dimsize
-        return np.arange(size, dtype=config.floatX).reshape(_shape)
-    X = tensor3()
-    W = tensor3()
-    Z = batched_dot(X, W)
-    f = function([X, W], Z)
-    w = np_genarray(30, 10, 5)
-    reversed_x_container = np_genarray(20, 40, 30)
-    x_container = reversed_x_container.T
-    def check_first_dim(inverted):
-        direction = -1 if inverted else 1
-        x = x_container[::direction, ::2, ::2]
-        assert x.shape == (30, 20, 10)
-        assert x.strides[0] == direction * np.dtype(config.floatX).itemsize
-        assert not (x.flags["C_CONTIGUOUS"] or x.flags["F_CONTIGUOUS"])
-        result = f(x, w)
-        ref_result = np.asarray(list(np.dot(u, v) for u, v in zip(x, w)))
-        utt.assert_allclose(ref_result, result)
-    for inverted in (0, 1):
-        check_first_dim(inverted)
-def test_batched_tensordot():
-    first = tensor4("first")
-    second = tensor4("second")
-    axes = [[1, 2], [3, 1]]
-    output = tt.batched_tensordot(first, second, axes)
-    first_val = np.random.rand(8, 10, 20, 3).astype(config.floatX)
-    second_val = np.random.rand(8, 20, 5, 10).astype(config.floatX)
-    result_fn = theano.function([first, second], output)
-    result = result_fn(first_val, second_val)
-    assert result.shape[0] == first_val.shape[0]
-    assert result.shape[1] == first_val.shape[3]
-    assert result.shape[2] == second_val.shape[2]
-    first_mat = dmatrix("first")
-    second_mat = dmatrix("second")
-    axes = 1
-    output = tt.batched_tensordot(first_mat, second_mat, axes)
-    first_mat_val = np.random.rand(10, 4).astype(config.floatX)
-    second_mat_val = np.random.rand(10, 4).astype(config.floatX)
-    result_fn = theano.function([first_mat, second_mat], output)
-    result = result_fn(first_mat_val, second_mat_val)
-    assert result.shape[0] == first_mat_val.shape[0]
-    assert len(result.shape) == 1
 def test_tensor_values_eq_approx():
    # test, inf, -inf and nan equal themself
    a = np.asarray([-np.inf, -1, 0, 1, np.inf, np.nan])
@@ -2369,7 +2229,7 @@ class TestOuter:
                v1 = np.asarray(np.random.rand(*s1)).astype(config.floatX)
                v2 = np.asarray(np.random.rand(*s2)).astype(config.floatX)
                o = tt.outer(x, y).eval({x: v1, y: v2})
-                assert_allclose(o, np.outer(v1, v2))
+                utt.assert_allclose(o, np.outer(v1, v2))
    def test_grad(self):
        # Test the combined graph of the graph of outer
@@ -6719,10 +6579,10 @@ class TestTensorInstanceMethods:
        x, y = self.vals
        # Use allclose comparison as a user reported on the mailing
        # list failure otherwise with array that print exactly the same.
-        assert_allclose(x.dot(y), X.dot(Y).eval({X: x, Y: y}))
+        utt.assert_allclose(x.dot(y), X.dot(Y).eval({X: x, Y: y}))
        Z = X.dot(Y)
        z = x.dot(y)
-        assert_allclose(x.dot(z), X.dot(Z).eval({X: x, Z: z}))
+        utt.assert_allclose(x.dot(z), X.dot(Z).eval({X: x, Z: z}))
    def test_real_imag(self):
        X, Y = self.vars
@@ -6751,7 +6611,7 @@ class TestTensorInstanceMethods:
        # std() is implemented as theano tree and does not pass its
        # args directly to numpy. This sometimes results in small
        # difference, so we use allclose test.
-        assert_allclose(X.std().eval({X: x}), x.std())
+        utt.assert_allclose(X.std().eval({X: x}), x.std())
    def test_repeat(self):
        X, _ = self.vars

--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
@@ -20,6 +20,7 @@ from theano.tensor import (
    xlogx,
 )
 from theano.tensor.basic import *
+from theano.tensor.blas import batched_dot, batched_tensordot
 from theano.tensor.extra_ops import (
    bartlett,
    bincount,

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3365,82 +3365,6 @@ def transpose(x, axes=None):
    return ret
-def batched_dot(a, b):
-    """
-    Compute the batched dot product of two variables:
-        batched_dot(a, b)[i] = dot(a[i], b[i])
-    Note that this batched_dot function does one of three things, in the
-    following sequence:
-        1.  If either a or b is a vector, it returns the batched elementwise
-            product without calling the Theano BatchedDot op.
-        2.  If both a and b have either 2 or 3 dimensions, it calls Theano's
-            BatchedDot op on a and b.
-        3.  If either a or b has more than 3 dimensions, it calls Theano's
-            batched_tensordot function with appropriate axes. The
-            batched_tensordot function expresses high-dimensional batched
-            dot products in terms of batched matrix-matrix dot products, so
-            it may be possible to futherize optimize for performance.
-    """
-    a, b = as_tensor_variable(a), as_tensor_variable(b)
-    if a.ndim == 0:
-        raise TypeError("a must have at least one (batch) axis")
-    elif b.ndim == 0:
-        raise TypeError("b must have at least one (batch) axis")
-    elif a.ndim == 1:
-        return a.dimshuffle(*([0] + ["x"] * (b.ndim - 1))) * b
-    elif b.ndim == 1:
-        return a * b.dimshuffle(*([0] + ["x"] * (a.ndim - 1)))
-    elif a.ndim > 3 or b.ndim > 3:
-        return batched_tensordot(a, b, [[a.ndim - 1], [np.maximum(1, b.ndim - 2)]])
-    else:
-        # avoid circular import
-        return theano.tensor.blas.BatchedDot()(a, b)
-def batched_tensordot(x, y, axes=2):
-    """Compute a batched tensordot product.
-    A hybrid of batched_dot and tensordot, this function computes the
-    tensordot product between the two tensors, by iterating over the
-    first dimension to perform a sequence of tensordots.
-    Parameters
-    ----------
-    x: TensorVariable
-        A tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
-    y: TensorVariable
-        A tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
-    axes: int or array-like of length 2
-        If an integer, the number of axes to sum over.
-        If an array, it must have two array elements containing the axes to sum
-        over in each tensor.
-        If an integer i, it is converted to an array containing
-        the last i dimensions of the first tensor and the first
-        i dimensions of the second tensor (excluding the first
-        (batch) dimension):
-            axes = [list(range(a.ndim - i, b.ndim)), list(range(1,i+1))]
-        If an array, its two elements must contain compatible axes
-        of the two tensors. For example, [[1, 2], [2, 4]] means sum
-        over the 2nd and 3rd axes of a and the 3rd and 5th axes of b.
-        (Remember axes are zero-indexed!) The 2nd axis of a and the
-        3rd axis of b must have the same shape; the same is true for
-        the 3rd axis of a and the 5th axis of b.
-    Like tensordot, this function uses a series of dimshuffles and
-    reshapes to reduce the tensor dot product to a matrix or vector
-    dot product.  Finally, it calls batched_dot to compute the result.
-    """
-    return _tensordot_as_dot(x, y, axes, dot=batched_dot, batched=True)
 def split(x, splits_size, n_splits, axis=0):
    the_split = Split(n_splits)
    return the_split(x, axis, splits_size)
@@ -6771,8 +6695,6 @@ __all__ = [
    "unbroadcast",
    "addbroadcast",
    "split",
-    "batched_tensordot",
-    "batched_dot",
    "transpose",
    "extract_constant",
    "clip",

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -2071,13 +2071,13 @@ class BatchedDot(COp):
            raise TypeError(
                "theano.tensor.blas.BatchedDot: input 0 (0-indexed)"
                f" must have ndim of 2 or 3, {int(inputs[0].ndim)} given. Consider"
-                " calling theano.tensor.batched_dot instead."
+                " calling theano.tensor.blas.batched_dot instead."
            )
        if inputs[1].ndim not in (2, 3):
            raise TypeError(
                "theano.tensor.blas.BatchedDot: input 1 (0-indexed)"
                f" must have ndim of 2 or 3, {int(inputs[1].ndim)} given. Consider"
-                " calling theano.tensor.batched_dot instead."
+                " calling theano.tensor.blas.batched_dot instead."
            )
        dtype = theano.scalar.upcast(*[input.type.dtype for input in inputs])
@@ -2424,18 +2424,18 @@ class BatchedDot(COp):
        # x is a matrix, y is a tensor3, grad is a matrix
        elif xdim == 2 and ydim == 3:
-            xgrad = tt.batched_dot(gz, y.dimshuffle(0, 2, 1))
+            xgrad = batched_dot(gz, y.dimshuffle(0, 2, 1))
            ygrad = x.dimshuffle(0, 1, "x") * gz.dimshuffle(0, "x", 1)
        # x is a tensor3, y is a matrix, grad is a matrix
        elif xdim == 3 and ydim == 2:
            xgrad = gz.dimshuffle(0, 1, "x") * y.dimshuffle(0, "x", 1)
-            ygrad = tt.batched_dot(x.dimshuffle(0, 2, 1), gz)
+            ygrad = batched_dot(x.dimshuffle(0, 2, 1), gz)
        # x is a tensor3, y is a tensor3, grad is a tensor3
        elif xdim == ydim == 3:
-            xgrad = tt.batched_dot(gz, y.dimshuffle(0, 2, 1))
+            xgrad = batched_dot(gz, y.dimshuffle(0, 2, 1))
-            ygrad = tt.batched_dot(x.dimshuffle(0, 2, 1), gz)
+            ygrad = batched_dot(x.dimshuffle(0, 2, 1), gz)
        # If x or y contain broadcastable dimensions but only one of
        # them know that a matching dimensions is broadcastable, the
@@ -2532,7 +2532,7 @@ class BatchedDot(COp):
        return [xshp[:-1] + yshp[2:]]
-batched_dot = BatchedDot()
+_batched_dot = BatchedDot()
 # from opt import register_specialize, register_canonicalize
@@ -2541,3 +2541,82 @@ batched_dot = BatchedDot()
 def local_print_as_we_go_along(fgraph, node):
    if node.op in (tt.sub, tt.add):
        debugprint(node)
+def batched_dot(a, b):
+    """Compute the batched dot product of two variables.
+    I.e.:
+        batched_dot(a, b)[i] = dot(a[i], b[i])
+    Note that this batched_dot function does one of three things, in the
+    following sequence:
+        1.  If either a or b is a vector, it returns the batched elementwise
+            product without calling the Theano BatchedDot op.
+        2.  If both a and b have either 2 or 3 dimensions, it calls Theano's
+            BatchedDot op on a and b.
+        3.  If either a or b has more than 3 dimensions, it calls Theano's
+            batched_tensordot function with appropriate axes. The
+            batched_tensordot function expresses high-dimensional batched
+            dot products in terms of batched matrix-matrix dot products, so
+            it may be possible to futherize optimize for performance.
+    """
+    a, b = tt.as_tensor_variable(a), tt.as_tensor_variable(b)
+    if a.ndim == 0:
+        raise TypeError("a must have at least one (batch) axis")
+    elif b.ndim == 0:
+        raise TypeError("b must have at least one (batch) axis")
+    elif a.ndim == 1:
+        return a.dimshuffle(*([0] + ["x"] * (b.ndim - 1))) * b
+    elif b.ndim == 1:
+        return a * b.dimshuffle(*([0] + ["x"] * (a.ndim - 1)))
+    elif a.ndim > 3 or b.ndim > 3:
+        return batched_tensordot(a, b, [[a.ndim - 1], [np.maximum(1, b.ndim - 2)]])
+    else:
+        # avoid circular import
+        return _batched_dot(a, b)
+def batched_tensordot(x, y, axes=2):
+    """Compute a batched tensordot product.
+    A hybrid of batched_dot and tensordot, this function computes the
+    tensordot product between the two tensors, by iterating over the
+    first dimension to perform a sequence of tensordots.
+    Parameters
+    ----------
+    x: TensorVariable
+        A tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
+    y: TensorVariable
+        A tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
+    axes: int or array-like of length 2
+        If an integer, the number of axes to sum over.
+        If an array, it must have two array elements containing the axes to sum
+        over in each tensor.
+        If an integer i, it is converted to an array containing
+        the last i dimensions of the first tensor and the first
+        i dimensions of the second tensor (excluding the first
+        (batch) dimension):
+            axes = [list(range(a.ndim - i, b.ndim)), list(range(1,i+1))]
+        If an array, its two elements must contain compatible axes
+        of the two tensors. For example, [[1, 2], [2, 4]] means sum
+        over the 2nd and 3rd axes of a and the 3rd and 5th axes of b.
+        (Remember axes are zero-indexed!) The 2nd axis of a and the
+        3rd axis of b must have the same shape; the same is true for
+        the 3rd axis of a and the 5th axis of b.
+    Like tensordot, this function uses a series of dimshuffles and
+    reshapes to reduce the tensor dot product to a matrix or vector
+    dot product.  Finally, it calls batched_dot to compute the result.
+    """
+    from theano.tensor.basic import _tensordot_as_dot
+    return _tensordot_as_dot(x, y, axes, dot=batched_dot, batched=True)
--- a/theano/tensor/nnet/ctc.py
+++ b/theano/tensor/nnet/ctc.py
@@ -8,6 +8,7 @@ from theano.graph.basic import Apply
 from theano.graph.op import ExternalCOp, OpenMPOp
 from theano.graph.opt import local_optimizer
 from theano.link.c.cmodule import GCC_compiler
+from theano.tensor.blas import batched_dot
 from theano.tensor.extra_ops import cpu_contiguous
 from theano.tensor.opt import register_canonicalize
 from theano.tensor.type import ftensor3, fvector
@@ -200,7 +201,7 @@ class ConnectionistTemporalClassification(ExternalCOp, OpenMPOp):
        assert gradients is not None
        grad_op = output_grads[0]
-        total_grad = tt.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
+        total_grad = batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
            1, 0, 2
        )
        return [