Remove tests.gpuarray

b3ce3640 · Maxim Kochurov · Brandon T. Willard · c803c67e · c803c67e · c803c67e
--- a/tests/gpuarray/__init__.py
+++ b/tests/gpuarray/__init__.py
--- a/tests/gpuarray/c_code/tstgpueye.c
+++ b/tests/gpuarray/c_code/tstgpueye.c
-#section kernels
-
-#kernel eye : *, size, size, size :
-#include <cluda.h>
-/* The eye name will be used to generate supporting objects.  The only
-   you probably need to care about is the kernel object which will be
-   named 'k_' + <the name above> (k_eye in this case).  This name also
-   has to match the kernel function name below.
- */
-
-KERNEL void eye(GLOBAL_MEM DTYPE_OUTPUT_0 *a, ga_size a_off, ga_size n, ga_size m) {
-  a = (GLOBAL_MEM DTYPE_OUTPUT_0 *)(((GLOBAL_MEM char *)a) + a_off);
-  ga_size nb = n < m ? n : m;
-  for (ga_size i = LID_0; i < nb; i += LDIM_0) {
-    a[i*m + i] = 1;
-  }
-}
-
-#section support_code_struct
-
-int APPLY_SPECIFIC(tstgpueye)(PyArrayObject *n, PyArrayObject *m,
-                              PyGpuArrayObject **z, PARAMS_TYPE* params) {
-  size_t dims[2] = {0, 0};
-  size_t ls, gs;
-  void *args[3];
-  int err;
-
-  dims[0] = ((DTYPE_INPUT_0 *)PyArray_DATA(n))[0];
-  dims[1] = ((DTYPE_INPUT_1 *)PyArray_DATA(m))[0];
-
-  Py_XDECREF(*z);
-  *z = pygpu_zeros(2, dims,
-                   params->typecode,
-                   GA_C_ORDER,
-                   params->context, Py_None);
-  if (*z == NULL)
-    return -1;
-
-  ls = 1;
-  gs = 256;
-  /* The eye_call name comes from the kernel declaration above. */
-  err = eye_call(1, &gs, &ls, 0, (*z)->ga.data, (*z)->ga.offset, dims[0], dims[1]);
-  if (err != GA_NO_ERROR) {
-    PyErr_Format(PyExc_RuntimeError,
-                 "gpuarray error: kEye: %s. n%lu, m=%lu.",
-                 GpuKernel_error(&k_eye, err),
-                 (unsigned long)dims[0], (unsigned long)dims[1]);
-    return -1;
-  }
-  return 0;
-}
--- a/tests/gpuarray/check_dnn_conv.py
+++ b/tests/gpuarray/check_dnn_conv.py
--- a/tests/gpuarray/config.py
+++ b/tests/gpuarray/config.py
-import pytest
-
-import aesara.gpuarray
-import aesara.tensor
-
-
-if aesara.gpuarray.pygpu is None:
-    pytest.skip("pygpu not installed", allow_module_level=True)
-
-
-init_error = None
-if not aesara.gpuarray.pygpu_activated and not aesara.config.force_device:
-    try:
-        aesara.gpuarray.init_dev("cuda")
-    except Exception as e:
-        init_error = e
-
-if not aesara.gpuarray.pygpu_activated:
-    if init_error:
-        pytest.skip(str(init_error), allow_module_level=True)
-    else:
-        pytest.skip("pygpu disabled", allow_module_level=True)
-
-test_ctx_name = None
-
-if aesara.config.mode == "FAST_COMPILE":
-    mode_with_gpu = (
-        aesara.compile.mode.get_mode("FAST_RUN").including("gpuarray").excluding("gpu")
-    )
-    mode_without_gpu = aesara.compile.mode.get_mode("FAST_RUN").excluding("gpuarray")
-else:
-    mode_with_gpu = (
-        aesara.compile.mode.get_default_mode().including("gpuarray").excluding("gpu")
-    )
-    mode_without_gpu = aesara.compile.mode.get_default_mode().excluding("gpuarray")
-    mode_without_gpu.check_py_code = False
-
-
-# If using float16, cast reference input to float32
-def ref_cast(x):
-    if x.type.dtype == "float16":
-        x = aesara.tensor.cast(x, "float32")
-    return x
--- a/tests/gpuarray/rnn_support.py
+++ b/tests/gpuarray/rnn_support.py
-import numpy as np
-
-import aesara
-from aesara.tensor.math import dot, sigmoid, tanh
-
-
-class Model:
-    def __init__(self, name=""):
-        self.name = name
-        self.layers = []
-        self.params = []
-        self.other_updates = {}
-
-    def add_layer(self, layer):
-        self.layers.append(layer)
-        for p in layer.params:
-            self.params.append(p)
-
-        if hasattr(layer, "other_updates"):
-            for y in layer.other_updates:
-                self.other_updates[y[0]] = y[1]
-
-    def get_params(self):
-        return self.params
-
-
-def uniform(stdev, size):
-    """uniform distribution with the given stdev and size"""
-    return np.random.uniform(
-        low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size
-    ).astype(aesara.config.floatX)
-
-
-def linear_transform_weights(input_dim, output_dim, param_list=None, name=""):
-    "aesara shared variable given input and output dimension"
-    weight_inialization = uniform(np.sqrt(2.0 / input_dim), (input_dim, output_dim))
-    W = aesara.shared(weight_inialization, name=name)
-
-    assert param_list is not None
-
-    param_list.append(W)
-    return W
-
-
-def bias_weights(length, param_list=None, name=""):
-    "aesara shared variable for bias unit, given length"
-    bias_initialization = np.zeros(length).astype(aesara.config.floatX)
-
-    bias = aesara.shared(bias_initialization, name=name)
-
-    if param_list is not None:
-        param_list.append(bias)
-
-    return bias
-
-
-class Layer:
-    """Generic Layer Template which all layers should inherit"""
-
-    def __init__(self, name=""):
-        self.name = name
-        self.params = []
-
-    def get_params(self):
-        return self.params
-
-
-class GRU(Layer):
-    def __init__(self, input_dim, output_dim, input_layer, s0=None, name=""):
-        """Layers information"""
-        self.name = name
-        self.input_dim = input_dim
-        self.hidden_dim = output_dim
-        self.output_dim = output_dim
-        self.input_layer = input_layer
-        self.X = input_layer.output()
-        self.s0 = s0
-        self.params = []
-
-        """Layers weights"""
-
-        """self.params is passed so that any parameters could be appended to it"""
-        self.W_r = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_r"
-        )
-        self.b_wr = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wr"
-        )
-
-        self.W_i = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_i"
-        )
-        self.b_wi = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wi"
-        )
-
-        self.W_h = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_h"
-        )
-        self.b_wh = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wh"
-        )
-
-        self.R_r = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_r"
-        )
-        self.b_rr = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_rr"
-        )
-
-        self.R_i = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_i"
-        )
-        self.b_ru = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_ru"
-        )
-
-        self.R_h = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_h"
-        )
-        self.b_rh = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_rh"
-        )
-
-        """step through processed input to create output"""
-
-        def step(inp, s_prev):
-            i_t = sigmoid(
-                dot(inp, self.W_i) + dot(s_prev, self.R_i) + self.b_wi + self.b_ru
-            )
-            r_t = sigmoid(
-                dot(inp, self.W_r) + dot(s_prev, self.R_r) + self.b_wr + self.b_rr
-            )
-
-            h_hat_t = tanh(
-                dot(inp, self.W_h)
-                + (r_t * (dot(s_prev, self.R_h) + self.b_rh))
-                + self.b_wh
-            )
-
-            s_curr = ((1.0 - i_t) * h_hat_t) + (i_t * s_prev)
-
-            return s_curr
-
-        outputs_info = self.s0
-
-        states, updates = aesara.scan(
-            fn=step, sequences=[self.X], outputs_info=outputs_info
-        )
-
-        self.Y = states
-
-    def output(self):
-        return self.Y
-
-
-class LSTM(Layer):
-    def __init__(self, input_dim, output_dim, input_layer, s0=None, c0=None, name=""):
-        """Layers information"""
-        self.name = name
-        self.input_dim = input_dim
-        self.hidden_dim = output_dim
-        self.output_dim = output_dim
-        self.input_layer = input_layer
-        self.X = input_layer.output()
-        self.s0 = s0
-        self.c0 = c0
-        self.params = []
-
-        """Layers weights"""
-
-        """self.params is passed so that any parameters could be appended to it"""
-        self.W_i = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_i"
-        )
-        self.b_wi = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wi"
-        )
-
-        self.W_f = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_f"
-        )
-        self.b_wf = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wf"
-        )
-
-        self.W_c = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_c"
-        )
-        self.b_wc = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wc"
-        )
-
-        self.W_o = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W_o"
-        )
-        self.b_wo = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_wo"
-        )
-
-        self.R_i = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_i"
-        )
-        self.b_ri = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_ri"
-        )
-
-        self.R_f = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_f"
-        )
-        self.b_rf = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_rf"
-        )
-
-        self.R_c = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_c"
-        )
-        self.b_rc = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_rc"
-        )
-
-        self.R_o = linear_transform_weights(
-            output_dim, output_dim, param_list=self.params, name=name + ".R_o"
-        )
-        self.b_ro = bias_weights(
-            (output_dim,), param_list=self.params, name=name + ".b_ro"
-        )
-
-        """step through processed input to create output"""
-
-        def step(x_t, h_tm1, c_tm1):
-            i_t = sigmoid(
-                dot(x_t, self.W_i) + dot(h_tm1, self.R_i) + self.b_wi + self.b_ri
-            )
-            f_t = sigmoid(
-                dot(x_t, self.W_f) + dot(h_tm1, self.R_f) + self.b_wf + self.b_rf
-            )
-            o_t = sigmoid(
-                dot(x_t, self.W_o) + dot(h_tm1, self.R_o) + self.b_ro + self.b_wo
-            )
-
-            c_hat_t = tanh(
-                dot(x_t, self.W_c) + dot(h_tm1, self.R_c) + self.b_wc + self.b_rc
-            )
-            c_t = f_t * c_tm1 + i_t * c_hat_t
-            h_t = o_t * tanh(c_t)
-
-            return h_t, c_t
-
-        outputs_info = [self.s0, self.c0]
-
-        states, updates = aesara.scan(
-            fn=step, sequences=[self.X], outputs_info=outputs_info
-        )
-
-        self.Y = states[0]
-        self.C = states[1]
-
-    def output(self):
-        return self.Y
-
-
-class FC(Layer):
-    def __init__(self, input_dim, output_dim, input_layer, name=""):
-        self.input_layer = input_layer
-        self.name = name
-        self.params = []
-        self.input_dim = input_dim
-        self.output_dim = output_dim
-        self.X = self.input_layer.output()
-
-        self.W = linear_transform_weights(
-            input_dim, output_dim, param_list=self.params, name=name + ".W"
-        )
-        self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
-
-    def output(self):
-        return dot(self.X, self.W) + self.b
-
-
-class WrapperLayer(Layer):
-    def __init__(self, X, name=""):
-        self.params = []
-        self.name = name
-        self.X = X
-
-    def output(self):
-        return self.X
--- a/tests/gpuarray/run_dnn_conv.py
+++ b/tests/gpuarray/run_dnn_conv.py
-# This script allows to run one specific cuDNN convolution test case.
-# This script should not be imported, but only used as a program.
-# python run_dnn_conv.py --help         # Print help.
-# python run_dnn_conv.py {fwd|bwd-filter|bwd-data} {2d|3d} -a <algo> -i <inputShape> -f <filterShape> ...
-import argparse
-import sys
-
-import aesara
-from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
-from aesara.gpuarray.cudnn_defs import (
-    DOUBLE,
-    DOUBLE_CONFIG,
-    FLOAT,
-    FLOAT_CONFIG,
-    HALF,
-    PSEUDO_HALF_CONFIG,
-    TRUE_HALF_CONFIG,
-)
-from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
-from tests.gpuarray.check_dnn_conv import CheckDnn, TestDnnConv2D, TestDnnConv3D, cudnn
-
-
-if __name__ != "__main__":
-    raise ImportError("This script must not be imported.")
-
-
-class TupleAction(argparse.Action):
-    # Tuple extractor for command line args parser.
-    def __call__(self, parser, namespace, values, option_string=None):
-        values = tuple(int(v) for v in values.split(","))
-        setattr(namespace, self.dest, values)
-
-
-class BorderAction(TupleAction):
-    # Border extractor for command line args parser.
-    def __call__(self, parser, namespace, values, option_string=None):
-        if values not in ("valid", "full", "half"):
-            super().__call__(parser, namespace, values, option_string)
-        else:
-            setattr(namespace, self.dest, values)
-
-
-args = sys.argv[1:]
-computations = FWD, BWD_FILTER, BWD_DATA = ("fwd", "gradweight", "gradinput")
-algorithms = (
-    tuple(
-        sorted(
-            list(
-                set(
-                    cudnn.cudnnConvolutionFwdAlgo_t.get_aliases()
-                    + cudnn.cudnnConvolutionBwdFilterAlgo_t.get_aliases()
-                    + cudnn.cudnnConvolutionBwdDataAlgo_t.get_aliases()
-                )
-            )
-        )
-    )
-    + SUPPORTED_DNN_CONV_ALGO_RUNTIME
-)
-types = (HALF, FLOAT, DOUBLE)
-data_type_configurations = dict(
-    TRUE_HALF_CONFIG=TRUE_HALF_CONFIG,
-    PSEUDO_HALF_CONFIG=PSEUDO_HALF_CONFIG,
-    FLOAT_CONFIG=FLOAT_CONFIG,
-    DOUBLE_CONFIG=DOUBLE_CONFIG,
-)
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("computation", choices=computations, help="Computation to run.")
-
-parser.add_argument(
-    "-a",
-    "--algo",
-    choices=algorithms,
-    required=True,
-    help="Algorithm to use for computation.",
-)
-parser.add_argument(
-    "-i",
-    "--input-shape",
-    action=TupleAction,
-    required=True,
-    help="Input shape. Comma-separated list of integers (no spaces).",
-)
-parser.add_argument(
-    "-f",
-    "--filter-shape",
-    action=TupleAction,
-    required=True,
-    help="Filter shape. Comma-separated list of integers (no spaces).",
-)
-
-parser.add_argument(
-    "-D",
-    "--dtype-config",
-    choices=list(sorted(data_type_configurations.keys())),
-    default=None,
-    help="Data type configuration for (data type; precision). Default (aesara floatX; aesara floatX). "
-    "To specify data type configuration, you can either use this option or set data type and "
-    'precision separately with "-t" and "-p" options.',
-)
-parser.add_argument(
-    "-t",
-    "--dtype",
-    choices=types,
-    default=None,
-    help="Data type (default aesara floatX).",
-)
-parser.add_argument(
-    "-p",
-    "--precision",
-    choices=types,
-    default=None,
-    help="Precision (default aesara floatX).",
-)
-parser.add_argument(
-    "-s",
-    "--subsample",
-    action=TupleAction,
-    help="Subsample. Comma-separated list of integers (no spaces). "
-    "Default: 1 per dimension.",
-)
-parser.add_argument(
-    "-d",
-    "--dilation",
-    action=TupleAction,
-    help="Dilation. Comma-separated list of integers (no spaces). "
-    "Default: 1 per dimension.",
-)
-parser.add_argument(
-    "-b",
-    "--border-mode",
-    default="valid",
-    action=BorderAction,
-    help='Border mode. "valid" (default), "full", "half" '
-    "or a comma-separated list of integers (no spaces).",
-)
-parser.add_argument(
-    "-c",
-    "--conv-mode",
-    choices=("conv", "cross"),
-    default="conv",
-    help="Conv mode (default: conv).",
-)
-parser.add_argument(
-    "-A",
-    "--alpha",
-    type=float,
-    default=1,
-    help="alpha (floating), must not be zero. Default 1.",
-)
-parser.add_argument(
-    "-B", "--beta", type=float, default=0, help="beta (floating). Default 0."
-)
-
-parser.add_argument(
-    "-I",
-    "--print-infos",
-    action="store_true",
-    default=False,
-    help="Print some infos before testing.",
-)
-
-args = parser.parse_args(args)
-
-test = args.computation
-if len(args.input_shape) != len(args.filter_shape):
-    raise ValueError("Expected same length for input shape and filter shape")
-if len(args.input_shape) not in (4, 5):
-    raise ValueError("Expected length 4 or 5 for input shape")
-ndim = len(args.input_shape) - 2
-if ndim == 2:
-    tests = TestDnnConv2D()
-elif ndim == 3:
-    tests = TestDnnConv3D()
-
-if args.subsample is None:
-    args.subsample = (1,) * ndim
-if args.dilation is None:
-    args.dilation = (1,) * ndim
-if not (ndim == len(args.subsample) == len(args.dilation)):
-    raise ValueError(f"Expected parameters sized for {int(ndim)} dimensions.")
-
-if isinstance(args.border_mode, tuple) and ndim != len(args.border_mode):
-    raise ValueError(f"Expected borders sized for {int(ndim)} dimensions.")
-
-if args.alpha == 0:
-    raise ValueError("Nothing could be computed if alpha is 0.")
-
-if args.dtype_config is None:
-    if args.dtype is None:
-        args.dtype = aesara.config.floatX
-    if args.precision is None:
-        args.precision = aesara.config.floatX
-else:
-    if args.dtype is not None or args.precision is not None:
-        raise ValueError(
-            "You must specify either -D <data-type-configuration> "
-            "or (-t <data-type> -p <precision>), not both."
-        )
-    args.dtype, args.precision = data_type_configurations[args.dtype_config]
-if (args.dtype, args.precision) not in cudnn.get_supported_dtype_configs():
-    raise ValueError(
-        f"Unsupported data type configuration {args.dtype} {args.precision}."
-    )
-
-if args.algo not in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
-    check_config = False
-    if test == FWD:
-        check_config = cudnn.fwd_algo_supports_dtype_config(
-            args.algo, args.dtype, args.precision, ndim
-        )
-    elif test == BWD_FILTER:
-        check_config = cudnn.bwd_filter_algo_supports_dtype_config(
-            args.algo, args.dtype, args.precision, ndim
-        )
-    elif test == BWD_DATA:
-        check_config = cudnn.bwd_data_algo_supports_dtype_config(
-            args.algo, args.dtype, args.precision, ndim
-        )
-    if not check_config:
-        print(
-            "Warning: %s computation does not normally support configuration (%s, %s) for algo %s."
-            % (test, args.dtype, args.precision, args.algo),
-            file=sys.stderr,
-        )
-
-algo = args.algo
-dtype = args.dtype
-precision = args.precision
-parameters = (
-    args.input_shape,
-    args.filter_shape,
-    args.subsample,
-    args.dilation,
-    args.border_mode,
-    args.conv_mode,
-    args.alpha,
-    args.beta,
-)
-if args.print_infos:
-    CheckDnn.print_infos(count_tests=False)
-print("======================")
-print("Running", test, algo, dtype, precision, *parameters)
-if test == FWD:
-    tests.run_conv_fwd(algo, dtype, precision, parameters)
-    expected_output_shape = get_conv_output_shape(
-        args.input_shape,
-        args.filter_shape,
-        args.border_mode,
-        args.subsample,
-        args.dilation,
-    )
-elif test == BWD_FILTER:
-    tests.run_conv_gradweight(algo, dtype, precision, parameters)
-    expected_output_shape = args.filter_shape
-elif test == BWD_DATA:
-    tests.run_conv_gradinput(algo, dtype, precision, parameters)
-    expected_output_shape = args.input_shape
-print("Computed shape:", expected_output_shape)
-print("... OK")
--- a/tests/gpuarray/test_abstractconv.py
+++ b/tests/gpuarray/test_abstractconv.py
--- a/tests/gpuarray/test_basic_ops.py
+++ b/tests/gpuarray/test_basic_ops.py
--- a/tests/gpuarray/test_blas.py
+++ b/tests/gpuarray/test_blas.py
-import itertools
-
-import numpy as np
-
-import aesara
-from aesara.configdefaults import config
-from aesara.gpuarray import gpuarray_shared_constructor
-from aesara.gpuarray.blas import (
-    GpuGemm,
-    GpuGer,
-    gpu_dot22,
-    gpugemm_inplace,
-    gpugemm_no_inplace,
-    gpugemmbatch_inplace,
-    gpugemv_inplace,
-    gpugemv_no_inplace,
-    gpuger_inplace,
-    gpuger_no_inplace,
-)
-from aesara.tensor.blas import (
-    BatchedDot,
-    _dot22,
-    batched_dot,
-    gemm_inplace,
-    gemv,
-    gemv_inplace,
-)
-from aesara.tensor.math import dot
-from aesara.tensor.type import matrix, tensor, tensor3, vector
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.gpuarray.test_basic_ops import makeTester, rand
-from tests.tensor.test_blas import BaseGemv, TestGer
-
-
-TestGpuGemv = makeTester(
-    "GpuGemvTester",
-    op=gemv_inplace,
-    gpu_op=gpugemv_inplace,
-    # It doesn't support float16
-    cases=dict(
-        dot_vv=[rand(1), 1.0, rand(1, 2), rand(2), 0.0],
-        dot_vm=[rand(3), 1.0, rand(3, 2), rand(2), 0.0],
-        float32=[
-            rand(3).astype("float32"),
-            np.float32(1),
-            rand(3, 2).astype("float32"),
-            rand(2).astype("float32"),
-            np.float32(0),
-        ],
-        float64=[
-            rand(3).astype("float64"),
-            np.float64(1),
-            rand(3, 2).astype("float64"),
-            rand(2).astype("float64"),
-            np.float64(0),
-        ],
-        # test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
-        # test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
-        # test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
-        test_stride=[rand(3)[::-1], 1.0, rand(3, 2)[::-1], rand(2)[::-1], 0.0],
-    ),
-)
-
-
-def test_float16():
-    # gemv (gemm called)
-    float16_data = [
-        rand(3).astype("float16"),
-        np.asarray(1, dtype=np.float32),
-        rand(3, 3).astype("float16"),
-        rand(3).astype("float16"),
-        np.asarray(0.5, dtype=np.float32),
-    ]
-    float16_shared = [
-        gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
-    ]
-    o = gemv(*float16_shared)
-    f = aesara.function([], o, mode=mode_with_gpu)
-    y, alpha, A, x, beta = float16_data
-    out = f()
-    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
-    topo = f.maker.fgraph.toposort()
-    assert any(isinstance(n.op, GpuGemm) for n in topo)
-
-    # gemm
-    float16_data = [
-        rand(3, 3).astype("float16"),
-        np.asarray(1, dtype=np.float32),
-        rand(3, 3).astype("float16"),
-        rand(3, 3).astype("float16"),
-        np.asarray(0.5, dtype=np.float32),
-    ]
-    float16_shared = [
-        gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
-    ]
-    o = gpugemm_no_inplace(*float16_shared)
-    f = aesara.function([], o)
-    y, alpha, A, x, beta = float16_data
-    out = f()
-    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
-
-    # dot22
-    float16_data = [rand(3, 3).astype("float16"), rand(3, 3).astype("float16")]
-
-    float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
-    o = gpu_dot22(*float16_shared)
-    f = aesara.function([], o)
-    x, y = float16_data
-    out = f()
-    utt.assert_allclose(np.asarray(out), np.dot(x, y))
-
-
-class TestGpuSgemv(BaseGemv, utt.OptimizationTestMixin):
-    mode = mode_with_gpu
-    dtype = "float32"
-
-    gemv = gpugemv_no_inplace
-    gemv_inplace = gpugemv_inplace
-
-    @staticmethod
-    def shared(val):
-        try:
-            return gpuarray_shared_constructor(val)
-        except TypeError:
-            return aesara.shared(val)
-
-
-TestGpuGemm = makeTester(
-    "GpuGemmTester",
-    op=gemm_inplace,
-    gpu_op=gpugemm_inplace,
-    # float16 tested in test_float16
-    cases=dict(
-        test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0],
-        test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0],
-        test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0],
-        test4=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.0],
-        test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6],
-        test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
-        test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
-        test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
-        float32=[
-            rand(3, 4).astype("float32"),
-            np.float32(-1.0),
-            rand(3, 5).astype("float32"),
-            rand(5, 4).astype("float32"),
-            np.float32(-1.1),
-        ],
-        float64=[
-            rand(3, 4).astype("float64"),
-            np.float64(-1.0),
-            rand(3, 5).astype("float64"),
-            rand(5, 4).astype("float64"),
-            np.float64(-1.1),
-        ],
-        # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
-        # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
-        # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
-        # test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
-    ),
-)
-
-
-gemm_batched_tests = {
-    "test_b%im%ik%in%i"
-    % (b, m, k, n): [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()]
-    for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)
-}
-
-gemm_batched_tests["float16"] = [
-    rand(3, 4, 7).astype("float16"),
-    rand().astype("float16"),
-    rand(3, 4, 4).astype("float16"),
-    rand(3, 4, 7).astype("float16"),
-    rand().astype("float16"),
-]
-gemm_batched_tests["float32"] = [
-    rand(3, 4, 7).astype("float32"),
-    rand().astype("float32"),
-    rand(3, 4, 4).astype("float32"),
-    rand(3, 4, 7).astype("float32"),
-    rand().astype("float32"),
-]
-gemm_batched_tests["float64"] = [
-    rand(3, 4, 7).astype("float64"),
-    rand().astype("float64"),
-    rand(3, 4, 4).astype("float64"),
-    rand(3, 4, 7).astype("float64"),
-    rand().astype("float64"),
-]
-
-
-TestGpuGemmBatch = makeTester(
-    "GpuGemmBatchTester",
-    op=lambda z, alpha, x, y, beta: alpha * BatchedDot()(x, y) + beta * z,
-    gpu_op=gpugemmbatch_inplace,
-    cases=gemm_batched_tests,
-)
-
-
-class TestGpuGemmBatchStrided:
-    def test_basic(self):
-        # Reported in https://github.com/Theano/Theano/issues/5730
-        x = tensor3()
-        y = tensor3()
-        z = batched_dot(x, y[:, 0, :, np.newaxis])
-        f = aesara.function([x, y], z, mode=mode_with_gpu)
-        x_num = np.arange(32 * 19 * 600, dtype=config.floatX).reshape((32, 19, 600))
-        y_num = np.arange(7 * 32 * 600, dtype=config.floatX).reshape((32, 7, 600))
-        f(x_num, y_num)
-        assert f.maker.fgraph.toposort()[-2].op.inplace
-
-
-class TestGpuSger(TestGer):
-    def setup_method(self):
-        self.mode = mode_with_gpu
-        dtype = self.dtype = "float32"  # optimization isn't dtype-dependent
-        self.A = tensor(dtype=dtype, broadcastable=(False, False))
-        self.a = tensor(dtype=dtype, broadcastable=())
-        self.x = tensor(dtype=dtype, broadcastable=(False,))
-        self.y = tensor(dtype=dtype, broadcastable=(False,))
-        self.ger_destructive = gpuger_inplace
-
-        # data on the gpu make the op always inplace
-        self.ger = gpuger_inplace
-        self.gemm = gpugemm_inplace
-        super().setup_method()
-
-
-class TestGpuSgerNoTransfer(TestGpuSger):
-    shared = staticmethod(gpuarray_shared_constructor)
-
-
-class TestGpuGer_OpContract(utt.OpContractTestMixin):
-    def setup_method(self):
-        self.ops = [gpuger_no_inplace, gpuger_inplace]
-
-    def clone(self, op):
-        return GpuGer(inplace=op.inplace)
-
-
-TestGpuDot22 = makeTester(
-    "GpuDot22Tester",
-    op=_dot22,
-    gpu_op=gpu_dot22,
-    cases=dict(
-        test1=[rand(3, 4), rand(4, 5)],
-        test2=[rand(1, 4), rand(4, 5)],
-        test3=[rand(3, 1), rand(1, 5)],
-        test4=[rand(3, 4), rand(4, 1)],
-        # test5=[rand(0, 4), rand(4, 5)],
-        # test6=[rand(3, 0), rand(0, 5)],
-        # test7=[rand(3, 4), rand(4, 0)],
-        # test8=[rand(0, 4), rand(4, 0)],
-        # test9=[rand(0, 0), rand(0, 0)],
-    ),
-)
-
-
-def test_gemv_zeros():
-    W = matrix()
-    v = vector()
-    f = aesara.function([W, v], W.dot(v), mode=mode_with_gpu)
-
-    # Apply to an empty matrix shape (5,0) and an empty vector shape (0,)
-    dim = 1000
-    A = np.zeros((dim, 0), dtype=aesara.config.floatX)
-    b = np.zeros((0,), dtype=aesara.config.floatX)
-    tmp = f(A, b)
-    assert np.allclose(tmp, np.zeros((dim,)))
-
-
-def test_gemv_dot_strides():
-    # Reported in https://github.com/Theano/Theano/issues/6142
-    xv = rand(5)
-    yv = rand(5, 1)
-    x = gpuarray_shared_constructor(xv)
-    y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
-    f = aesara.function([], dot(x, y[::-1]), mode=mode_with_gpu)
-    out = f()
-    utt.assert_allclose(out, np.dot(xv, yv[::-1]))
--- a/tests/gpuarray/test_blocksparse.py
+++ b/tests/gpuarray/test_blocksparse.py
-import numpy as np
-import pytest
-
-import aesara
-import tests.unittest_tools as utt
-from aesara.gpuarray.blocksparse import (
-    GpuSparseBlockGemv,
-    GpuSparseBlockOuter,
-    gpu_sparse_block_gemv,
-    gpu_sparse_block_outer,
-)
-from aesara.gpuarray.type import gpuarray_shared_constructor
-from aesara.tensor.type import fmatrix, ftensor3, lmatrix
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.tensor.nnet.test_blocksparse import TestBlockSparseGemvAndOuter
-
-
-class TestBlockSparseGemvAndOuterGPUarray(TestBlockSparseGemvAndOuter):
-    def setup_method(self):
-        self.mode = mode_with_gpu.excluding("constant_folding")
-        self.gemv_op = gpu_sparse_block_gemv
-        self.outer_op = gpu_sparse_block_outer
-        self.gemv_class = GpuSparseBlockGemv
-        self.outer_class = GpuSparseBlockOuter
-        super().setup_method()
-
-    @pytest.mark.skip(
-        reason="""
-        This test is temporarily disabled since we disabled the output_merge
-        and alpha_merge optimizations for blocksparse due to brokenness.
-        Re-enable when those are re-added.
-        """
-    )
-    def test_blocksparse_grad_merge(self):
-        b = fmatrix()
-        h = ftensor3()
-        iIdx = lmatrix()
-        oIdx = lmatrix()
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-        W = gpuarray_shared_constructor(W_val, context=test_ctx_name)
-
-        o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-        gW = aesara.grad(o.sum(), W)
-
-        lr = np.asarray(0.05, dtype="float32")
-
-        upd = W - lr * gW
-
-        f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu)
-
-        # Make sure the lr update was merged.
-        assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
-
-        # Exclude the merge optimizations.
-        mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha")
-        mode = mode.excluding("local_merge_blocksparse_output")
-
-        f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
-
-        # Make sure the lr update is not merged.
-        assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
-
-        f2(h_val, iIdx_val, b_val, oIdx_val)
-        W_ref = W.get_value()
-
-        # reset the var
-        W.set_value(W_val)
-        f1(h_val, iIdx_val, b_val, oIdx_val)
-        W_opt = W.get_value()
-
-        utt.assert_allclose(W_ref, W_opt)
--- a/tests/gpuarray/test_cgpukernelbase.py
+++ b/tests/gpuarray/test_cgpukernelbase.py
-import numpy as np
-import pytest
-
-import aesara
-from aesara import config
-from aesara import tensor as at
-from aesara.gpuarray.basic_ops import CGpuKernelBase
-from aesara.gpuarray.type import GpuArrayType, get_context, gpu_context_type
-from aesara.gradient import grad_undefined
-from aesara.graph.basic import Apply
-from aesara.link.c.params_type import ParamsType
-from aesara.scalar import int32 as int_t
-
-
-class GpuEye(CGpuKernelBase):
-    """Eye for GPU.
-
-    This is an implementation to test that `CGpuKernelBase` works and also
-    to use as an example in the docs.  It is not used for user graphs.
-
-    """
-
-    __props__ = ("dtype", "context_name")
-    params_type = ParamsType(typecode=int_t, context=gpu_context_type)
-
-    def __init__(self, dtype=None, context_name=None):
-        if dtype is None:
-            dtype = config.floatX
-        self.dtype = dtype
-        self.context_name = context_name
-        super().__init__(["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)")
-
-    def get_params(self, node):
-        pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray")
-
-        return self.params_type.get_params(
-            typecode=pygpu_gpuarray.dtype_to_typecode(self.dtype),
-            context=get_context(self.context_name),
-        )
-
-    def c_headers(self, **kwargs):
-        return ["<gpuarray/types.h>", "<gpuarray/kernel.h>"]
-
-    def make_node(self, n, m):
-        n = at.as_tensor_variable(n)
-        m = at.as_tensor_variable(m)
-        assert n.ndim == 0
-        assert m.ndim == 0
-        otype = GpuArrayType(
-            dtype=self.dtype,
-            broadcastable=(False, False),
-            context_name=self.context_name,
-        )
-
-        return Apply(self, [n, m], [otype()])
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        out_shape = [node.inputs[0], node.inputs[1]]
-        return [out_shape]
-
-    def grad(self, inp, grads):
-        return [grad_undefined(self, i, inp[i]) for i in range(2)]
-
-
-def test_cgpukernelbase():
-    # Import inside the function to prevent the back-end from being
-    # initialized when reloading the GpuEye object from cache.
-    from .config import mode_with_gpu, test_ctx_name
-
-    op = GpuEye(dtype="int32", context_name=test_ctx_name)
-
-    f = aesara.function([], op(4, 5), mode=mode_with_gpu)
-
-    r = f()
-
-    assert r.dtype == "int32"
-    assert (np.asarray(r) == np.eye(4, 5, dtype="int32")).all()
--- a/tests/gpuarray/test_ctc.py
+++ b/tests/gpuarray/test_ctc.py
-import numpy as np
-import pytest
-
-import aesara
-import aesara.gpuarray
-from aesara.gpuarray.ctc import GpuConnectionistTemporalClassification, gpu_ctc
-from aesara.gradient import grad
-from aesara.tensor.math import mean
-from aesara.tensor.nnet.ctc import (
-    ConnectionistTemporalClassification,
-    ctc,
-    ctc_available,
-)
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
-from tests.tensor.nnet.test_ctc import setup_ctc_case, setup_grad_case, setup_torch_case
-
-
-@pytest.mark.skipif(
-    not ctc_available(), reason="Optional library warp-ctc not available"
-)
-class TestCTC:
-    def check_ctc(
-        self, activations, labels, input_length, expected_costs, expected_grads
-    ):
-        # Create symbolic variables
-        t_activations = aesara.shared(activations, name="activations")
-        t_activation_times = aesara.shared(input_length, name="activation_times")
-        t_labels = aesara.shared(labels, name="labels")
-
-        inputs = [t_activations, t_labels, t_activation_times]
-
-        # Execute several tests for each test case
-        self.check_expected_values(
-            t_activations, t_labels, t_activation_times, expected_costs, expected_grads
-        )
-        self.compare_gpu_and_cpu_values(*inputs)
-        self.check_grads_disabled(*inputs)
-        self.run_gpu_optimization_with_grad(*inputs)
-        self.run_gpu_optimization_no_grad(*inputs)
-
-    def setup_cpu_op(
-        self,
-        activations,
-        labels,
-        input_length,
-        compute_grad=True,
-        mode=mode_without_gpu,
-    ):
-        cpu_ctc_cost = ctc(activations, labels, input_length)
-        outputs = [cpu_ctc_cost]
-        if compute_grad:
-            # Symbolic gradient of CTC cost
-            cpu_ctc_grad = grad(mean(cpu_ctc_cost), activations)
-            outputs += [cpu_ctc_grad]
-        return aesara.function([], outputs, mode=mode)
-
-    def setup_gpu_op(self, activations, labels, input_length, compute_grad=True):
-        gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
-        outputs = [gpu_ctc_cost]
-        if compute_grad:
-            # Symbolic gradient of CTC cost
-            gpu_ctc_grad = grad(mean(gpu_ctc_cost), activations)
-            outputs += [gpu_ctc_grad]
-        return aesara.function([], outputs, mode=mode_with_gpu)
-
-    def check_expected_values(
-        self, activations, labels, input_length, expected_costs, expected_grads
-    ):
-        gpu_train = self.setup_gpu_op(activations, labels, input_length)
-        gpu_cost, gpu_grad = gpu_train()
-        # Transfer costs from GPU memory to host
-        cost_from_gpu = np.asarray(gpu_cost)
-        # Transfer gradients from GPU memory to host
-        grad_from_gpu = np.asarray(gpu_grad)
-        # Check that results are in conformance with expected values
-        utt.assert_allclose(expected_grads / cost_from_gpu.shape[0], grad_from_gpu)
-        utt.assert_allclose(expected_costs, cost_from_gpu)
-
-    def compare_gpu_and_cpu_values(self, activations, labels, input_length):
-        cpu_train = self.setup_cpu_op(activations, labels, input_length)
-        cpu_cost, cpu_grad = cpu_train()
-
-        gpu_train = self.setup_gpu_op(activations, labels, input_length)
-        gpu_cost, gpu_grad = gpu_train()
-        # Transfer costs from GPU memory to host
-        cost_from_gpu = np.asarray(gpu_cost)
-        # Transfer gradients from GPU memory to host
-        grad_from_gpu = np.asarray(gpu_grad)
-        # Check that results are in conformance with expected values
-        utt.assert_allclose(cpu_grad, grad_from_gpu)
-        utt.assert_allclose(cpu_cost, cost_from_gpu)
-
-    def check_grads_disabled(self, activations, labels, input_length):
-        """
-        Check if optimization to disable gradients is working
-        """
-        gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
-        gpu_ctc_function = aesara.function([], [gpu_ctc_cost])
-        for node in gpu_ctc_function.maker.fgraph.apply_nodes:
-            if isinstance(node.op, GpuConnectionistTemporalClassification):
-                assert node.op.compute_grad is False
-
-    def run_gpu_optimization_with_grad(self, activations, labels, input_length):
-        # Compile CPU function with optimization
-        cpu_lifted_train = self.setup_cpu_op(
-            activations, labels, input_length, mode=mode_with_gpu
-        )
-        # Check whether Op is lifted to the GPU
-        assert self.has_only_gpu_op(cpu_lifted_train)
-
-    def run_gpu_optimization_no_grad(self, activations, labels, input_length):
-        cpu_train = self.setup_cpu_op(
-            activations, labels, input_length, compute_grad=False
-        )
-        cpu_cost = cpu_train()
-        # Compile CPU function with optimization
-        cpu_lifted_test = self.setup_cpu_op(
-            activations, labels, input_length, compute_grad=False, mode=mode_with_gpu
-        )
-        # Check whether Op is lifted to the GPU
-        assert self.has_only_gpu_op(cpu_lifted_test)
-        gpu_cost = cpu_lifted_test()
-        # Transfer costs from GPU memory to host
-        cost_from_gpu = np.asarray(gpu_cost)
-        # Compare values from CPU and GPU Ops
-        utt.assert_allclose(cpu_cost, cost_from_gpu)
-
-    def has_only_gpu_op(self, function):
-        has_cpu_instance = False
-        has_gpu_instance = False
-        for node in function.maker.fgraph.apply_nodes:
-            if isinstance(node.op, ConnectionistTemporalClassification):
-                has_cpu_instance = True
-
-            if isinstance(node.op, GpuConnectionistTemporalClassification):
-                has_gpu_instance = True
-        return has_gpu_instance and (not has_cpu_instance)
-
-    # Test obtained from Torch tutorial at:
-    # https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md
-    def test_torch_case(self):
-        (
-            activations,
-            labels,
-            activation_times,
-            expected_costs,
-            expected_grads,
-        ) = setup_torch_case()
-        self.check_ctc(
-            activations, labels, activation_times, expected_costs, expected_grads
-        )
-
-    def test_ctc(self):
-        (
-            activations,
-            labels,
-            input_length,
-            expected_costs,
-            expected_grads,
-        ) = setup_ctc_case()
-        self.check_ctc(
-            activations, labels, input_length, expected_costs, expected_grads
-        )
-
-    def test_verify_grad(self):
-        def ctc_op_functor(labels, in_lengths):
-            def wrapper(acts):
-                # Create auxiliary symbolic variables
-                t_activation_times = aesara.shared(in_lengths, name="activation_times")
-                t_labels = aesara.shared(labels, name="labels")
-                return gpu_ctc(acts, t_labels, t_activation_times)
-
-            return wrapper
-
-        activations, labels, activation_times = setup_grad_case()
-
-        ctc_op = ctc_op_functor(labels, activation_times)
-
-        utt.verify_grad(ctc_op, [activations], mode=mode_with_gpu)
--- a/tests/gpuarray/test_dnn.py
+++ b/tests/gpuarray/test_dnn.py
--- a/tests/gpuarray/test_elemwise.py
+++ b/tests/gpuarray/test_elemwise.py
--- a/tests/gpuarray/test_extra_ops.py
+++ b/tests/gpuarray/test_extra_ops.py
--- a/tests/gpuarray/test_fft.py
+++ b/tests/gpuarray/test_fft.py
--- a/tests/gpuarray/test_gemmcorr.py
+++ b/tests/gpuarray/test_gemmcorr.py
--- a/tests/gpuarray/test_gemmcorr3d.py
+++ b/tests/gpuarray/test_gemmcorr3d.py
--- a/tests/gpuarray/test_linalg.py
+++ b/tests/gpuarray/test_linalg.py
--- a/tests/gpuarray/test_misc.py
+++ b/tests/gpuarray/test_misc.py
--- a/tests/gpuarray/test_multinomial.py
+++ b/tests/gpuarray/test_multinomial.py
--- a/tests/gpuarray/test_neighbours.py
+++ b/tests/gpuarray/test_neighbours.py
--- a/tests/gpuarray/test_nnet.py
+++ b/tests/gpuarray/test_nnet.py
--- a/tests/gpuarray/test_opt.py
+++ b/tests/gpuarray/test_opt.py
--- a/tests/gpuarray/test_others.py
+++ b/tests/gpuarray/test_others.py
--- a/tests/gpuarray/test_pickle.py
+++ b/tests/gpuarray/test_pickle.py
--- a/tests/gpuarray/test_pool.py
+++ b/tests/gpuarray/test_pool.py
--- a/tests/gpuarray/test_reduction.py
+++ b/tests/gpuarray/test_reduction.py
--- a/tests/gpuarray/test_rng_mrg.py
+++ b/tests/gpuarray/test_rng_mrg.py
--- a/tests/gpuarray/test_scan.py
+++ b/tests/gpuarray/test_scan.py
--- a/tests/gpuarray/test_sort.py
+++ b/tests/gpuarray/test_sort.py
--- a/tests/gpuarray/test_subtensor.py
+++ b/tests/gpuarray/test_subtensor.py
--- a/tests/gpuarray/test_type.py
+++ b/tests/gpuarray/test_type.py