提交 b3ce3640 authored 作者: Maxim Kochurov's avatar Maxim Kochurov 提交者: Brandon T. Willard

Remove tests.gpuarray

上级 c803c67e
#section kernels
#kernel eye : *, size, size, size :
#include <cluda.h>
/* The eye name will be used to generate supporting objects. The only
you probably need to care about is the kernel object which will be
named 'k_' + <the name above> (k_eye in this case). This name also
has to match the kernel function name below.
*/
KERNEL void eye(GLOBAL_MEM DTYPE_OUTPUT_0 *a, ga_size a_off, ga_size n, ga_size m) {
a = (GLOBAL_MEM DTYPE_OUTPUT_0 *)(((GLOBAL_MEM char *)a) + a_off);
ga_size nb = n < m ? n : m;
for (ga_size i = LID_0; i < nb; i += LDIM_0) {
a[i*m + i] = 1;
}
}
#section support_code_struct
int APPLY_SPECIFIC(tstgpueye)(PyArrayObject *n, PyArrayObject *m,
PyGpuArrayObject **z, PARAMS_TYPE* params) {
size_t dims[2] = {0, 0};
size_t ls, gs;
void *args[3];
int err;
dims[0] = ((DTYPE_INPUT_0 *)PyArray_DATA(n))[0];
dims[1] = ((DTYPE_INPUT_1 *)PyArray_DATA(m))[0];
Py_XDECREF(*z);
*z = pygpu_zeros(2, dims,
params->typecode,
GA_C_ORDER,
params->context, Py_None);
if (*z == NULL)
return -1;
ls = 1;
gs = 256;
/* The eye_call name comes from the kernel declaration above. */
err = eye_call(1, &gs, &ls, 0, (*z)->ga.data, (*z)->ga.offset, dims[0], dims[1]);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"gpuarray error: kEye: %s. n%lu, m=%lu.",
GpuKernel_error(&k_eye, err),
(unsigned long)dims[0], (unsigned long)dims[1]);
return -1;
}
return 0;
}
差异被折叠。
import pytest
import aesara.gpuarray
import aesara.tensor
if aesara.gpuarray.pygpu is None:
pytest.skip("pygpu not installed", allow_module_level=True)
init_error = None
if not aesara.gpuarray.pygpu_activated and not aesara.config.force_device:
try:
aesara.gpuarray.init_dev("cuda")
except Exception as e:
init_error = e
if not aesara.gpuarray.pygpu_activated:
if init_error:
pytest.skip(str(init_error), allow_module_level=True)
else:
pytest.skip("pygpu disabled", allow_module_level=True)
test_ctx_name = None
if aesara.config.mode == "FAST_COMPILE":
mode_with_gpu = (
aesara.compile.mode.get_mode("FAST_RUN").including("gpuarray").excluding("gpu")
)
mode_without_gpu = aesara.compile.mode.get_mode("FAST_RUN").excluding("gpuarray")
else:
mode_with_gpu = (
aesara.compile.mode.get_default_mode().including("gpuarray").excluding("gpu")
)
mode_without_gpu = aesara.compile.mode.get_default_mode().excluding("gpuarray")
mode_without_gpu.check_py_code = False
# If using float16, cast reference input to float32
def ref_cast(x):
if x.type.dtype == "float16":
x = aesara.tensor.cast(x, "float32")
return x
import numpy as np
import aesara
from aesara.tensor.math import dot, sigmoid, tanh
class Model:
def __init__(self, name=""):
self.name = name
self.layers = []
self.params = []
self.other_updates = {}
def add_layer(self, layer):
self.layers.append(layer)
for p in layer.params:
self.params.append(p)
if hasattr(layer, "other_updates"):
for y in layer.other_updates:
self.other_updates[y[0]] = y[1]
def get_params(self):
return self.params
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return np.random.uniform(
low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size
).astype(aesara.config.floatX)
def linear_transform_weights(input_dim, output_dim, param_list=None, name=""):
"aesara shared variable given input and output dimension"
weight_inialization = uniform(np.sqrt(2.0 / input_dim), (input_dim, output_dim))
W = aesara.shared(weight_inialization, name=name)
assert param_list is not None
param_list.append(W)
return W
def bias_weights(length, param_list=None, name=""):
"aesara shared variable for bias unit, given length"
bias_initialization = np.zeros(length).astype(aesara.config.floatX)
bias = aesara.shared(bias_initialization, name=name)
if param_list is not None:
param_list.append(bias)
return bias
class Layer:
"""Generic Layer Template which all layers should inherit"""
def __init__(self, name=""):
self.name = name
self.params = []
def get_params(self):
return self.params
class GRU(Layer):
def __init__(self, input_dim, output_dim, input_layer, s0=None, name=""):
"""Layers information"""
self.name = name
self.input_dim = input_dim
self.hidden_dim = output_dim
self.output_dim = output_dim
self.input_layer = input_layer
self.X = input_layer.output()
self.s0 = s0
self.params = []
"""Layers weights"""
"""self.params is passed so that any parameters could be appended to it"""
self.W_r = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_r"
)
self.b_wr = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wr"
)
self.W_i = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_i"
)
self.b_wi = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wi"
)
self.W_h = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_h"
)
self.b_wh = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wh"
)
self.R_r = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_r"
)
self.b_rr = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_rr"
)
self.R_i = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_i"
)
self.b_ru = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_ru"
)
self.R_h = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_h"
)
self.b_rh = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_rh"
)
"""step through processed input to create output"""
def step(inp, s_prev):
i_t = sigmoid(
dot(inp, self.W_i) + dot(s_prev, self.R_i) + self.b_wi + self.b_ru
)
r_t = sigmoid(
dot(inp, self.W_r) + dot(s_prev, self.R_r) + self.b_wr + self.b_rr
)
h_hat_t = tanh(
dot(inp, self.W_h)
+ (r_t * (dot(s_prev, self.R_h) + self.b_rh))
+ self.b_wh
)
s_curr = ((1.0 - i_t) * h_hat_t) + (i_t * s_prev)
return s_curr
outputs_info = self.s0
states, updates = aesara.scan(
fn=step, sequences=[self.X], outputs_info=outputs_info
)
self.Y = states
def output(self):
return self.Y
class LSTM(Layer):
def __init__(self, input_dim, output_dim, input_layer, s0=None, c0=None, name=""):
"""Layers information"""
self.name = name
self.input_dim = input_dim
self.hidden_dim = output_dim
self.output_dim = output_dim
self.input_layer = input_layer
self.X = input_layer.output()
self.s0 = s0
self.c0 = c0
self.params = []
"""Layers weights"""
"""self.params is passed so that any parameters could be appended to it"""
self.W_i = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_i"
)
self.b_wi = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wi"
)
self.W_f = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_f"
)
self.b_wf = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wf"
)
self.W_c = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_c"
)
self.b_wc = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wc"
)
self.W_o = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W_o"
)
self.b_wo = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_wo"
)
self.R_i = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_i"
)
self.b_ri = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_ri"
)
self.R_f = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_f"
)
self.b_rf = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_rf"
)
self.R_c = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_c"
)
self.b_rc = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_rc"
)
self.R_o = linear_transform_weights(
output_dim, output_dim, param_list=self.params, name=name + ".R_o"
)
self.b_ro = bias_weights(
(output_dim,), param_list=self.params, name=name + ".b_ro"
)
"""step through processed input to create output"""
def step(x_t, h_tm1, c_tm1):
i_t = sigmoid(
dot(x_t, self.W_i) + dot(h_tm1, self.R_i) + self.b_wi + self.b_ri
)
f_t = sigmoid(
dot(x_t, self.W_f) + dot(h_tm1, self.R_f) + self.b_wf + self.b_rf
)
o_t = sigmoid(
dot(x_t, self.W_o) + dot(h_tm1, self.R_o) + self.b_ro + self.b_wo
)
c_hat_t = tanh(
dot(x_t, self.W_c) + dot(h_tm1, self.R_c) + self.b_wc + self.b_rc
)
c_t = f_t * c_tm1 + i_t * c_hat_t
h_t = o_t * tanh(c_t)
return h_t, c_t
outputs_info = [self.s0, self.c0]
states, updates = aesara.scan(
fn=step, sequences=[self.X], outputs_info=outputs_info
)
self.Y = states[0]
self.C = states[1]
def output(self):
return self.Y
class FC(Layer):
def __init__(self, input_dim, output_dim, input_layer, name=""):
self.input_layer = input_layer
self.name = name
self.params = []
self.input_dim = input_dim
self.output_dim = output_dim
self.X = self.input_layer.output()
self.W = linear_transform_weights(
input_dim, output_dim, param_list=self.params, name=name + ".W"
)
self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
def output(self):
return dot(self.X, self.W) + self.b
class WrapperLayer(Layer):
def __init__(self, X, name=""):
self.params = []
self.name = name
self.X = X
def output(self):
return self.X
# This script allows to run one specific cuDNN convolution test case.
# This script should not be imported, but only used as a program.
# python run_dnn_conv.py --help # Print help.
# python run_dnn_conv.py {fwd|bwd-filter|bwd-data} {2d|3d} -a <algo> -i <inputShape> -f <filterShape> ...
import argparse
import sys
import aesara
from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
from aesara.gpuarray.cudnn_defs import (
DOUBLE,
DOUBLE_CONFIG,
FLOAT,
FLOAT_CONFIG,
HALF,
PSEUDO_HALF_CONFIG,
TRUE_HALF_CONFIG,
)
from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
from tests.gpuarray.check_dnn_conv import CheckDnn, TestDnnConv2D, TestDnnConv3D, cudnn
if __name__ != "__main__":
raise ImportError("This script must not be imported.")
class TupleAction(argparse.Action):
# Tuple extractor for command line args parser.
def __call__(self, parser, namespace, values, option_string=None):
values = tuple(int(v) for v in values.split(","))
setattr(namespace, self.dest, values)
class BorderAction(TupleAction):
# Border extractor for command line args parser.
def __call__(self, parser, namespace, values, option_string=None):
if values not in ("valid", "full", "half"):
super().__call__(parser, namespace, values, option_string)
else:
setattr(namespace, self.dest, values)
args = sys.argv[1:]
computations = FWD, BWD_FILTER, BWD_DATA = ("fwd", "gradweight", "gradinput")
algorithms = (
tuple(
sorted(
list(
set(
cudnn.cudnnConvolutionFwdAlgo_t.get_aliases()
+ cudnn.cudnnConvolutionBwdFilterAlgo_t.get_aliases()
+ cudnn.cudnnConvolutionBwdDataAlgo_t.get_aliases()
)
)
)
)
+ SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
types = (HALF, FLOAT, DOUBLE)
data_type_configurations = dict(
TRUE_HALF_CONFIG=TRUE_HALF_CONFIG,
PSEUDO_HALF_CONFIG=PSEUDO_HALF_CONFIG,
FLOAT_CONFIG=FLOAT_CONFIG,
DOUBLE_CONFIG=DOUBLE_CONFIG,
)
parser = argparse.ArgumentParser()
parser.add_argument("computation", choices=computations, help="Computation to run.")
parser.add_argument(
"-a",
"--algo",
choices=algorithms,
required=True,
help="Algorithm to use for computation.",
)
parser.add_argument(
"-i",
"--input-shape",
action=TupleAction,
required=True,
help="Input shape. Comma-separated list of integers (no spaces).",
)
parser.add_argument(
"-f",
"--filter-shape",
action=TupleAction,
required=True,
help="Filter shape. Comma-separated list of integers (no spaces).",
)
parser.add_argument(
"-D",
"--dtype-config",
choices=list(sorted(data_type_configurations.keys())),
default=None,
help="Data type configuration for (data type; precision). Default (aesara floatX; aesara floatX). "
"To specify data type configuration, you can either use this option or set data type and "
'precision separately with "-t" and "-p" options.',
)
parser.add_argument(
"-t",
"--dtype",
choices=types,
default=None,
help="Data type (default aesara floatX).",
)
parser.add_argument(
"-p",
"--precision",
choices=types,
default=None,
help="Precision (default aesara floatX).",
)
parser.add_argument(
"-s",
"--subsample",
action=TupleAction,
help="Subsample. Comma-separated list of integers (no spaces). "
"Default: 1 per dimension.",
)
parser.add_argument(
"-d",
"--dilation",
action=TupleAction,
help="Dilation. Comma-separated list of integers (no spaces). "
"Default: 1 per dimension.",
)
parser.add_argument(
"-b",
"--border-mode",
default="valid",
action=BorderAction,
help='Border mode. "valid" (default), "full", "half" '
"or a comma-separated list of integers (no spaces).",
)
parser.add_argument(
"-c",
"--conv-mode",
choices=("conv", "cross"),
default="conv",
help="Conv mode (default: conv).",
)
parser.add_argument(
"-A",
"--alpha",
type=float,
default=1,
help="alpha (floating), must not be zero. Default 1.",
)
parser.add_argument(
"-B", "--beta", type=float, default=0, help="beta (floating). Default 0."
)
parser.add_argument(
"-I",
"--print-infos",
action="store_true",
default=False,
help="Print some infos before testing.",
)
args = parser.parse_args(args)
test = args.computation
if len(args.input_shape) != len(args.filter_shape):
raise ValueError("Expected same length for input shape and filter shape")
if len(args.input_shape) not in (4, 5):
raise ValueError("Expected length 4 or 5 for input shape")
ndim = len(args.input_shape) - 2
if ndim == 2:
tests = TestDnnConv2D()
elif ndim == 3:
tests = TestDnnConv3D()
if args.subsample is None:
args.subsample = (1,) * ndim
if args.dilation is None:
args.dilation = (1,) * ndim
if not (ndim == len(args.subsample) == len(args.dilation)):
raise ValueError(f"Expected parameters sized for {int(ndim)} dimensions.")
if isinstance(args.border_mode, tuple) and ndim != len(args.border_mode):
raise ValueError(f"Expected borders sized for {int(ndim)} dimensions.")
if args.alpha == 0:
raise ValueError("Nothing could be computed if alpha is 0.")
if args.dtype_config is None:
if args.dtype is None:
args.dtype = aesara.config.floatX
if args.precision is None:
args.precision = aesara.config.floatX
else:
if args.dtype is not None or args.precision is not None:
raise ValueError(
"You must specify either -D <data-type-configuration> "
"or (-t <data-type> -p <precision>), not both."
)
args.dtype, args.precision = data_type_configurations[args.dtype_config]
if (args.dtype, args.precision) not in cudnn.get_supported_dtype_configs():
raise ValueError(
f"Unsupported data type configuration {args.dtype} {args.precision}."
)
if args.algo not in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
check_config = False
if test == FWD:
check_config = cudnn.fwd_algo_supports_dtype_config(
args.algo, args.dtype, args.precision, ndim
)
elif test == BWD_FILTER:
check_config = cudnn.bwd_filter_algo_supports_dtype_config(
args.algo, args.dtype, args.precision, ndim
)
elif test == BWD_DATA:
check_config = cudnn.bwd_data_algo_supports_dtype_config(
args.algo, args.dtype, args.precision, ndim
)
if not check_config:
print(
"Warning: %s computation does not normally support configuration (%s, %s) for algo %s."
% (test, args.dtype, args.precision, args.algo),
file=sys.stderr,
)
algo = args.algo
dtype = args.dtype
precision = args.precision
parameters = (
args.input_shape,
args.filter_shape,
args.subsample,
args.dilation,
args.border_mode,
args.conv_mode,
args.alpha,
args.beta,
)
if args.print_infos:
CheckDnn.print_infos(count_tests=False)
print("======================")
print("Running", test, algo, dtype, precision, *parameters)
if test == FWD:
tests.run_conv_fwd(algo, dtype, precision, parameters)
expected_output_shape = get_conv_output_shape(
args.input_shape,
args.filter_shape,
args.border_mode,
args.subsample,
args.dilation,
)
elif test == BWD_FILTER:
tests.run_conv_gradweight(algo, dtype, precision, parameters)
expected_output_shape = args.filter_shape
elif test == BWD_DATA:
tests.run_conv_gradinput(algo, dtype, precision, parameters)
expected_output_shape = args.input_shape
print("Computed shape:", expected_output_shape)
print("... OK")
差异被折叠。
差异被折叠。
import itertools
import numpy as np
import aesara
from aesara.configdefaults import config
from aesara.gpuarray import gpuarray_shared_constructor
from aesara.gpuarray.blas import (
GpuGemm,
GpuGer,
gpu_dot22,
gpugemm_inplace,
gpugemm_no_inplace,
gpugemmbatch_inplace,
gpugemv_inplace,
gpugemv_no_inplace,
gpuger_inplace,
gpuger_no_inplace,
)
from aesara.tensor.blas import (
BatchedDot,
_dot22,
batched_dot,
gemm_inplace,
gemv,
gemv_inplace,
)
from aesara.tensor.math import dot
from aesara.tensor.type import matrix, tensor, tensor3, vector
from tests import unittest_tools as utt
from tests.gpuarray.config import mode_with_gpu, test_ctx_name
from tests.gpuarray.test_basic_ops import makeTester, rand
from tests.tensor.test_blas import BaseGemv, TestGer
TestGpuGemv = makeTester(
"GpuGemvTester",
op=gemv_inplace,
gpu_op=gpugemv_inplace,
# It doesn't support float16
cases=dict(
dot_vv=[rand(1), 1.0, rand(1, 2), rand(2), 0.0],
dot_vm=[rand(3), 1.0, rand(3, 2), rand(2), 0.0],
float32=[
rand(3).astype("float32"),
np.float32(1),
rand(3, 2).astype("float32"),
rand(2).astype("float32"),
np.float32(0),
],
float64=[
rand(3).astype("float64"),
np.float64(1),
rand(3, 2).astype("float64"),
rand(2).astype("float64"),
np.float64(0),
],
# test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
# test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
# test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride=[rand(3)[::-1], 1.0, rand(3, 2)[::-1], rand(2)[::-1], 0.0],
),
)
def test_float16():
# gemv (gemm called)
float16_data = [
rand(3).astype("float16"),
np.asarray(1, dtype=np.float32),
rand(3, 3).astype("float16"),
rand(3).astype("float16"),
np.asarray(0.5, dtype=np.float32),
]
float16_shared = [
gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
]
o = gemv(*float16_shared)
f = aesara.function([], o, mode=mode_with_gpu)
y, alpha, A, x, beta = float16_data
out = f()
utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
topo = f.maker.fgraph.toposort()
assert any(isinstance(n.op, GpuGemm) for n in topo)
# gemm
float16_data = [
rand(3, 3).astype("float16"),
np.asarray(1, dtype=np.float32),
rand(3, 3).astype("float16"),
rand(3, 3).astype("float16"),
np.asarray(0.5, dtype=np.float32),
]
float16_shared = [
gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
]
o = gpugemm_no_inplace(*float16_shared)
f = aesara.function([], o)
y, alpha, A, x, beta = float16_data
out = f()
utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
# dot22
float16_data = [rand(3, 3).astype("float16"), rand(3, 3).astype("float16")]
float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
o = gpu_dot22(*float16_shared)
f = aesara.function([], o)
x, y = float16_data
out = f()
utt.assert_allclose(np.asarray(out), np.dot(x, y))
class TestGpuSgemv(BaseGemv, utt.OptimizationTestMixin):
mode = mode_with_gpu
dtype = "float32"
gemv = gpugemv_no_inplace
gemv_inplace = gpugemv_inplace
@staticmethod
def shared(val):
try:
return gpuarray_shared_constructor(val)
except TypeError:
return aesara.shared(val)
TestGpuGemm = makeTester(
"GpuGemmTester",
op=gemm_inplace,
gpu_op=gpugemm_inplace,
# float16 tested in test_float16
cases=dict(
test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0],
test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0],
test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0],
test4=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.0],
test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6],
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
float32=[
rand(3, 4).astype("float32"),
np.float32(-1.0),
rand(3, 5).astype("float32"),
rand(5, 4).astype("float32"),
np.float32(-1.1),
],
float64=[
rand(3, 4).astype("float64"),
np.float64(-1.0),
rand(3, 5).astype("float64"),
rand(5, 4).astype("float64"),
np.float64(-1.1),
],
# test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
# test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
),
)
gemm_batched_tests = {
"test_b%im%ik%in%i"
% (b, m, k, n): [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()]
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)
}
gemm_batched_tests["float16"] = [
rand(3, 4, 7).astype("float16"),
rand().astype("float16"),
rand(3, 4, 4).astype("float16"),
rand(3, 4, 7).astype("float16"),
rand().astype("float16"),
]
gemm_batched_tests["float32"] = [
rand(3, 4, 7).astype("float32"),
rand().astype("float32"),
rand(3, 4, 4).astype("float32"),
rand(3, 4, 7).astype("float32"),
rand().astype("float32"),
]
gemm_batched_tests["float64"] = [
rand(3, 4, 7).astype("float64"),
rand().astype("float64"),
rand(3, 4, 4).astype("float64"),
rand(3, 4, 7).astype("float64"),
rand().astype("float64"),
]
TestGpuGemmBatch = makeTester(
"GpuGemmBatchTester",
op=lambda z, alpha, x, y, beta: alpha * BatchedDot()(x, y) + beta * z,
gpu_op=gpugemmbatch_inplace,
cases=gemm_batched_tests,
)
class TestGpuGemmBatchStrided:
def test_basic(self):
# Reported in https://github.com/Theano/Theano/issues/5730
x = tensor3()
y = tensor3()
z = batched_dot(x, y[:, 0, :, np.newaxis])
f = aesara.function([x, y], z, mode=mode_with_gpu)
x_num = np.arange(32 * 19 * 600, dtype=config.floatX).reshape((32, 19, 600))
y_num = np.arange(7 * 32 * 600, dtype=config.floatX).reshape((32, 7, 600))
f(x_num, y_num)
assert f.maker.fgraph.toposort()[-2].op.inplace
class TestGpuSger(TestGer):
def setup_method(self):
self.mode = mode_with_gpu
dtype = self.dtype = "float32" # optimization isn't dtype-dependent
self.A = tensor(dtype=dtype, broadcastable=(False, False))
self.a = tensor(dtype=dtype, broadcastable=())
self.x = tensor(dtype=dtype, broadcastable=(False,))
self.y = tensor(dtype=dtype, broadcastable=(False,))
self.ger_destructive = gpuger_inplace
# data on the gpu make the op always inplace
self.ger = gpuger_inplace
self.gemm = gpugemm_inplace
super().setup_method()
class TestGpuSgerNoTransfer(TestGpuSger):
shared = staticmethod(gpuarray_shared_constructor)
class TestGpuGer_OpContract(utt.OpContractTestMixin):
def setup_method(self):
self.ops = [gpuger_no_inplace, gpuger_inplace]
def clone(self, op):
return GpuGer(inplace=op.inplace)
TestGpuDot22 = makeTester(
"GpuDot22Tester",
op=_dot22,
gpu_op=gpu_dot22,
cases=dict(
test1=[rand(3, 4), rand(4, 5)],
test2=[rand(1, 4), rand(4, 5)],
test3=[rand(3, 1), rand(1, 5)],
test4=[rand(3, 4), rand(4, 1)],
# test5=[rand(0, 4), rand(4, 5)],
# test6=[rand(3, 0), rand(0, 5)],
# test7=[rand(3, 4), rand(4, 0)],
# test8=[rand(0, 4), rand(4, 0)],
# test9=[rand(0, 0), rand(0, 0)],
),
)
def test_gemv_zeros():
W = matrix()
v = vector()
f = aesara.function([W, v], W.dot(v), mode=mode_with_gpu)
# Apply to an empty matrix shape (5,0) and an empty vector shape (0,)
dim = 1000
A = np.zeros((dim, 0), dtype=aesara.config.floatX)
b = np.zeros((0,), dtype=aesara.config.floatX)
tmp = f(A, b)
assert np.allclose(tmp, np.zeros((dim,)))
def test_gemv_dot_strides():
# Reported in https://github.com/Theano/Theano/issues/6142
xv = rand(5)
yv = rand(5, 1)
x = gpuarray_shared_constructor(xv)
y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
f = aesara.function([], dot(x, y[::-1]), mode=mode_with_gpu)
out = f()
utt.assert_allclose(out, np.dot(xv, yv[::-1]))
import numpy as np
import pytest
import aesara
import tests.unittest_tools as utt
from aesara.gpuarray.blocksparse import (
GpuSparseBlockGemv,
GpuSparseBlockOuter,
gpu_sparse_block_gemv,
gpu_sparse_block_outer,
)
from aesara.gpuarray.type import gpuarray_shared_constructor
from aesara.tensor.type import fmatrix, ftensor3, lmatrix
from tests.gpuarray.config import mode_with_gpu, test_ctx_name
from tests.tensor.nnet.test_blocksparse import TestBlockSparseGemvAndOuter
class TestBlockSparseGemvAndOuterGPUarray(TestBlockSparseGemvAndOuter):
def setup_method(self):
self.mode = mode_with_gpu.excluding("constant_folding")
self.gemv_op = gpu_sparse_block_gemv
self.outer_op = gpu_sparse_block_outer
self.gemv_class = GpuSparseBlockGemv
self.outer_class = GpuSparseBlockOuter
super().setup_method()
@pytest.mark.skip(
reason="""
This test is temporarily disabled since we disabled the output_merge
and alpha_merge optimizations for blocksparse due to brokenness.
Re-enable when those are re-added.
"""
)
def test_blocksparse_grad_merge(self):
b = fmatrix()
h = ftensor3()
iIdx = lmatrix()
oIdx = lmatrix()
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
W = gpuarray_shared_constructor(W_val, context=test_ctx_name)
o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
gW = aesara.grad(o.sum(), W)
lr = np.asarray(0.05, dtype="float32")
upd = W - lr * gW
f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu)
# Make sure the lr update was merged.
assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
# Exclude the merge optimizations.
mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha")
mode = mode.excluding("local_merge_blocksparse_output")
f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
# Make sure the lr update is not merged.
assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
f2(h_val, iIdx_val, b_val, oIdx_val)
W_ref = W.get_value()
# reset the var
W.set_value(W_val)
f1(h_val, iIdx_val, b_val, oIdx_val)
W_opt = W.get_value()
utt.assert_allclose(W_ref, W_opt)
import numpy as np
import pytest
import aesara
from aesara import config
from aesara import tensor as at
from aesara.gpuarray.basic_ops import CGpuKernelBase
from aesara.gpuarray.type import GpuArrayType, get_context, gpu_context_type
from aesara.gradient import grad_undefined
from aesara.graph.basic import Apply
from aesara.link.c.params_type import ParamsType
from aesara.scalar import int32 as int_t
class GpuEye(CGpuKernelBase):
"""Eye for GPU.
This is an implementation to test that `CGpuKernelBase` works and also
to use as an example in the docs. It is not used for user graphs.
"""
__props__ = ("dtype", "context_name")
params_type = ParamsType(typecode=int_t, context=gpu_context_type)
def __init__(self, dtype=None, context_name=None):
if dtype is None:
dtype = config.floatX
self.dtype = dtype
self.context_name = context_name
super().__init__(["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)")
def get_params(self, node):
pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray")
return self.params_type.get_params(
typecode=pygpu_gpuarray.dtype_to_typecode(self.dtype),
context=get_context(self.context_name),
)
def c_headers(self, **kwargs):
return ["<gpuarray/types.h>", "<gpuarray/kernel.h>"]
def make_node(self, n, m):
n = at.as_tensor_variable(n)
m = at.as_tensor_variable(m)
assert n.ndim == 0
assert m.ndim == 0
otype = GpuArrayType(
dtype=self.dtype,
broadcastable=(False, False),
context_name=self.context_name,
)
return Apply(self, [n, m], [otype()])
def infer_shape(self, fgraph, node, in_shapes):
out_shape = [node.inputs[0], node.inputs[1]]
return [out_shape]
def grad(self, inp, grads):
return [grad_undefined(self, i, inp[i]) for i in range(2)]
def test_cgpukernelbase():
# Import inside the function to prevent the back-end from being
# initialized when reloading the GpuEye object from cache.
from .config import mode_with_gpu, test_ctx_name
op = GpuEye(dtype="int32", context_name=test_ctx_name)
f = aesara.function([], op(4, 5), mode=mode_with_gpu)
r = f()
assert r.dtype == "int32"
assert (np.asarray(r) == np.eye(4, 5, dtype="int32")).all()
import numpy as np
import pytest
import aesara
import aesara.gpuarray
from aesara.gpuarray.ctc import GpuConnectionistTemporalClassification, gpu_ctc
from aesara.gradient import grad
from aesara.tensor.math import mean
from aesara.tensor.nnet.ctc import (
ConnectionistTemporalClassification,
ctc,
ctc_available,
)
from tests import unittest_tools as utt
from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
from tests.tensor.nnet.test_ctc import setup_ctc_case, setup_grad_case, setup_torch_case
@pytest.mark.skipif(
not ctc_available(), reason="Optional library warp-ctc not available"
)
class TestCTC:
def check_ctc(
self, activations, labels, input_length, expected_costs, expected_grads
):
# Create symbolic variables
t_activations = aesara.shared(activations, name="activations")
t_activation_times = aesara.shared(input_length, name="activation_times")
t_labels = aesara.shared(labels, name="labels")
inputs = [t_activations, t_labels, t_activation_times]
# Execute several tests for each test case
self.check_expected_values(
t_activations, t_labels, t_activation_times, expected_costs, expected_grads
)
self.compare_gpu_and_cpu_values(*inputs)
self.check_grads_disabled(*inputs)
self.run_gpu_optimization_with_grad(*inputs)
self.run_gpu_optimization_no_grad(*inputs)
def setup_cpu_op(
self,
activations,
labels,
input_length,
compute_grad=True,
mode=mode_without_gpu,
):
cpu_ctc_cost = ctc(activations, labels, input_length)
outputs = [cpu_ctc_cost]
if compute_grad:
# Symbolic gradient of CTC cost
cpu_ctc_grad = grad(mean(cpu_ctc_cost), activations)
outputs += [cpu_ctc_grad]
return aesara.function([], outputs, mode=mode)
def setup_gpu_op(self, activations, labels, input_length, compute_grad=True):
gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
outputs = [gpu_ctc_cost]
if compute_grad:
# Symbolic gradient of CTC cost
gpu_ctc_grad = grad(mean(gpu_ctc_cost), activations)
outputs += [gpu_ctc_grad]
return aesara.function([], outputs, mode=mode_with_gpu)
def check_expected_values(
self, activations, labels, input_length, expected_costs, expected_grads
):
gpu_train = self.setup_gpu_op(activations, labels, input_length)
gpu_cost, gpu_grad = gpu_train()
# Transfer costs from GPU memory to host
cost_from_gpu = np.asarray(gpu_cost)
# Transfer gradients from GPU memory to host
grad_from_gpu = np.asarray(gpu_grad)
# Check that results are in conformance with expected values
utt.assert_allclose(expected_grads / cost_from_gpu.shape[0], grad_from_gpu)
utt.assert_allclose(expected_costs, cost_from_gpu)
def compare_gpu_and_cpu_values(self, activations, labels, input_length):
cpu_train = self.setup_cpu_op(activations, labels, input_length)
cpu_cost, cpu_grad = cpu_train()
gpu_train = self.setup_gpu_op(activations, labels, input_length)
gpu_cost, gpu_grad = gpu_train()
# Transfer costs from GPU memory to host
cost_from_gpu = np.asarray(gpu_cost)
# Transfer gradients from GPU memory to host
grad_from_gpu = np.asarray(gpu_grad)
# Check that results are in conformance with expected values
utt.assert_allclose(cpu_grad, grad_from_gpu)
utt.assert_allclose(cpu_cost, cost_from_gpu)
def check_grads_disabled(self, activations, labels, input_length):
"""
Check if optimization to disable gradients is working
"""
gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
gpu_ctc_function = aesara.function([], [gpu_ctc_cost])
for node in gpu_ctc_function.maker.fgraph.apply_nodes:
if isinstance(node.op, GpuConnectionistTemporalClassification):
assert node.op.compute_grad is False
def run_gpu_optimization_with_grad(self, activations, labels, input_length):
# Compile CPU function with optimization
cpu_lifted_train = self.setup_cpu_op(
activations, labels, input_length, mode=mode_with_gpu
)
# Check whether Op is lifted to the GPU
assert self.has_only_gpu_op(cpu_lifted_train)
def run_gpu_optimization_no_grad(self, activations, labels, input_length):
cpu_train = self.setup_cpu_op(
activations, labels, input_length, compute_grad=False
)
cpu_cost = cpu_train()
# Compile CPU function with optimization
cpu_lifted_test = self.setup_cpu_op(
activations, labels, input_length, compute_grad=False, mode=mode_with_gpu
)
# Check whether Op is lifted to the GPU
assert self.has_only_gpu_op(cpu_lifted_test)
gpu_cost = cpu_lifted_test()
# Transfer costs from GPU memory to host
cost_from_gpu = np.asarray(gpu_cost)
# Compare values from CPU and GPU Ops
utt.assert_allclose(cpu_cost, cost_from_gpu)
def has_only_gpu_op(self, function):
has_cpu_instance = False
has_gpu_instance = False
for node in function.maker.fgraph.apply_nodes:
if isinstance(node.op, ConnectionistTemporalClassification):
has_cpu_instance = True
if isinstance(node.op, GpuConnectionistTemporalClassification):
has_gpu_instance = True
return has_gpu_instance and (not has_cpu_instance)
# Test obtained from Torch tutorial at:
# https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md
def test_torch_case(self):
(
activations,
labels,
activation_times,
expected_costs,
expected_grads,
) = setup_torch_case()
self.check_ctc(
activations, labels, activation_times, expected_costs, expected_grads
)
def test_ctc(self):
(
activations,
labels,
input_length,
expected_costs,
expected_grads,
) = setup_ctc_case()
self.check_ctc(
activations, labels, input_length, expected_costs, expected_grads
)
def test_verify_grad(self):
def ctc_op_functor(labels, in_lengths):
def wrapper(acts):
# Create auxiliary symbolic variables
t_activation_times = aesara.shared(in_lengths, name="activation_times")
t_labels = aesara.shared(labels, name="labels")
return gpu_ctc(acts, t_labels, t_activation_times)
return wrapper
activations, labels, activation_times = setup_grad_case()
ctc_op = ctc_op_functor(labels, activation_times)
utt.verify_grad(ctc_op, [activations], mode=mode_with_gpu)
This source diff could not be displayed because it is too large. You can view the blob instead.
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论