提交 aca733c8 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5774 from nouiz/less_gpuelemwise

Don't move scalar float* elemwise unless the result is needed on the GPU.
...@@ -50,9 +50,8 @@ class GpuGemv(BlasOp): ...@@ -50,9 +50,8 @@ class GpuGemv(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
...@@ -60,6 +59,13 @@ class GpuGemv(BlasOp): ...@@ -60,6 +59,13 @@ class GpuGemv(BlasOp):
assert x.ndim == 1 assert x.ndim == 1
assert y.ndim == 1 assert y.ndim == 1
assert A.dtype == x.dtype == y.dtype assert A.dtype == x.dtype == y.dtype
# float16 not supported
expected = A.dtype
assert theano.scalar.upcast(alpha.dtype,
beta.dtype, expected) == expected
alpha = alpha.astype(expected)
beta = beta.astype(expected)
return Apply(self, [y, alpha, A, x, beta], [y.type()]) return Apply(self, [y, alpha, A, x, beta], [y.type()])
def perform(self, node, inputs, out_storage): def perform(self, node, inputs, out_storage):
...@@ -163,15 +169,30 @@ class GpuGemm(BlasOp): ...@@ -163,15 +169,30 @@ class GpuGemm(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name) B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name) C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
if not (A.dtype == B.dtype == C.dtype):
raise TypeError(theano.tensor.blas.Gemm.E_mixed,
(A.dtype, B.dtype, C.dtype,
alpha.dtype, beta.dtype))
if not A.dtype.startswith('float'):
raise TypeError(theano.tensor.blas.Gemm.E_float, (A.dtype))
if A.dtype == 'float16':
expected = 'float32'
else:
expected = A.dtype
assert theano.scalar.upcast(alpha.dtype,
beta.dtype, expected) == expected
alpha = alpha.astype(expected)
beta = beta.astype(expected)
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
assert A.ndim == 2 assert A.ndim == 2
assert B.ndim == 2 assert B.ndim == 2
assert C.ndim == 2 assert C.ndim == 2
assert A.dtype == B.dtype == C.dtype
return Apply(self, [C, alpha, A, B, beta], [C.type()]) return Apply(self, [C, alpha, A, B, beta], [C.type()])
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
...@@ -244,13 +265,17 @@ class GpuGer(BlasOp): ...@@ -244,13 +265,17 @@ class GpuGer(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') if not(A.dtype == x.dtype == y.dtype):
raise TypeError('ger requires matching dtypes',
(A.dtype, alpha.dtype, x.dtype, y.dtype))
assert theano.scalar.upcast(alpha.dtype, A.dtype) == A.dtype
alpha = alpha.astype(A.dtype)
assert alpha.ndim == 0 assert alpha.ndim == 0
assert A.ndim == 2 assert A.ndim == 2
assert x.ndim == 1 assert x.ndim == 1
assert y.ndim == 1 assert y.ndim == 1
assert A.dtype == x.dtype == y.dtype
return Apply(self, [A, alpha, x, y], [A.type()]) return Apply(self, [A, alpha, x, y], [A.type()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
...@@ -383,15 +408,14 @@ class GpuGemmBatch(BlasOp): ...@@ -383,15 +408,14 @@ class GpuGemmBatch(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name) B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name) C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
assert A.ndim == 3 assert A.ndim == 3
assert B.ndim == 3 assert B.ndim == 3
assert C.ndim == 3 assert C.ndim == 3
assert A.dtype == B.dtype == C.dtype assert A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype
return Apply(self, [C, alpha, A, B, beta], [C.type()]) return Apply(self, [C, alpha, A, B, beta], [C.type()])
def c_headers(self): def c_headers(self):
......
...@@ -702,6 +702,7 @@ def local_gpua_elemwise(op, context_name, inputs, outputs): ...@@ -702,6 +702,7 @@ def local_gpua_elemwise(op, context_name, inputs, outputs):
name = 'Gpu' + name name = 'Gpu' + name
if len(outputs) > 1: if len(outputs) > 1:
return return
have_cuda = False have_cuda = False
have_opencl = False have_opencl = False
if inputs and isinstance(inputs[0].type, GpuArrayType): if inputs and isinstance(inputs[0].type, GpuArrayType):
...@@ -1162,6 +1163,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1162,6 +1163,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv]) @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
@register_opt2([tensor.blas.Gemv], 'fast_compile') @register_opt2([tensor.blas.Gemv], 'fast_compile')
def local_gpua_gemv(op, context_name, inputs, outputs): def local_gpua_gemv(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
if op.inplace: if op.inplace:
return gpugemv_inplace return gpugemv_inplace
else: else:
...@@ -1172,6 +1175,8 @@ def local_gpua_gemv(op, context_name, inputs, outputs): ...@@ -1172,6 +1175,8 @@ def local_gpua_gemv(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.Gemm]) @op_lifter([tensor.blas.Gemm])
@register_opt2([tensor.blas.Gemm], 'fast_compile') @register_opt2([tensor.blas.Gemm], 'fast_compile')
def local_gpua_gemm(op, context_name, inputs, outputs): def local_gpua_gemm(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return
if op.inplace: if op.inplace:
return gpugemm_inplace return gpugemm_inplace
else: else:
...@@ -1182,9 +1187,12 @@ def local_gpua_gemm(op, context_name, inputs, outputs): ...@@ -1182,9 +1187,12 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.BatchedDot]) @op_lifter([tensor.blas.BatchedDot])
@register_opt2([tensor.blas.BatchedDot], 'fast_compile') @register_opt2([tensor.blas.BatchedDot], 'fast_compile')
def local_gpua_gemmbatch(op, context_name, inputs, outputs): def local_gpua_gemmbatch(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
a, b = inputs a, b = inputs
c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2]) c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0) return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
a, b, np.asarray(0.0, dtype=a.dtype))
@register_opt() @register_opt()
...@@ -1215,6 +1223,8 @@ def local_gpua_gemmbatch_output_merge(node, *inputs): ...@@ -1215,6 +1223,8 @@ def local_gpua_gemmbatch_output_merge(node, *inputs):
@op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer]) @op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
@register_opt2([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer], 'fast_compile') @register_opt2([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer], 'fast_compile')
def local_gpua_ger(op, context_name, inputs, outputs): def local_gpua_ger(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
return GpuGer(inplace=op.destructive) return GpuGer(inplace=op.destructive)
......
...@@ -234,7 +234,8 @@ def gpu_alloc_expected(x, *shp): ...@@ -234,7 +234,8 @@ def gpu_alloc_expected(x, *shp):
GpuAllocTester = makeTester( GpuAllocTester = makeTester(
name="GpuAllocTester", name="GpuAllocTester",
op=alloc, # The +1 is there to allow the lift to the GPU.
op=lambda *args: alloc(*args) + 1,
gpu_op=GpuAlloc(test_ctx_name), gpu_op=GpuAlloc(test_ctx_name),
cases=dict( cases=dict(
correct01=(rand(), np.int32(7)), correct01=(rand(), np.int32(7)),
......
...@@ -15,7 +15,8 @@ from .config import mode_with_gpu ...@@ -15,7 +15,8 @@ from .config import mode_with_gpu
from .test_basic_ops import makeTester, rand from .test_basic_ops import makeTester, rand
from ..blas import (gpugemv_inplace, gpugemv_no_inplace, from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemmbatch_no_inplace, gpugemm_inplace, gpugemm_no_inplace,
gpugemmbatch_no_inplace,
gpuger_inplace, gpuger_no_inplace, gpuger_inplace, gpuger_no_inplace,
GpuGer, gpu_dot22) GpuGer, gpu_dot22)
...@@ -23,16 +24,51 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace, ...@@ -23,16 +24,51 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
GpuGemvTester = makeTester( GpuGemvTester = makeTester(
'GpuGemvTester', 'GpuGemvTester',
op=gemv_inplace, gpu_op=gpugemv_inplace, op=gemv_inplace, gpu_op=gpugemv_inplace,
cases=dict(dot_vv=[rand(1), 1, rand(1, 2), rand(2), 0], # It doesn't support float16
dot_vm=[rand(3), 1, rand(3, 2), rand(2), 0], cases=dict(dot_vv=[rand(1), 1., rand(1, 2), rand(2), 0.],
dot_vm=[rand(3), 1., rand(3, 2), rand(2), 0.],
float32=[rand(3).astype('float32'), np.float32(1),
rand(3, 2).astype('float32'),
rand(2).astype('float32'), np.float32(0)],
float64=[rand(3).astype('float64'), np.float64(1),
rand(3, 2).astype('float64'),
rand(2).astype('float64'), np.float64(0)],
# test_02=[rand(0), 1, rand(0, 2), rand(2), 0], # test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
# test_30=[rand(3), 1, rand(3, 0), rand(0), 0], # test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
# test_00=[rand(0), 1, rand(0, 0), rand(0), 0], # test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride=[rand(3)[::-1], 1, rand(3, 2)[::-1], rand(2)[::-1], 0], test_stride=[rand(3)[::-1], 1., rand(3, 2)[::-1], rand(2)[::-1], 0.],
) )
) )
def test_float16():
# gemm
float16_data = [rand(3, 3).astype('float16'),
np.asarray(1, dtype=np.float32),
rand(3, 3).astype('float16'),
rand(3, 3).astype('float16'),
np.asarray(0.5, dtype=np.float32)]
float16_shared = [gpuarray_shared_constructor(val)
for val in float16_data]
o = gpugemm_no_inplace(*float16_shared)
f = theano.function([], o)
y, alpha, A, x, beta = float16_data
out = f()
utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
# dot22
float16_data = [rand(3, 3).astype('float16'),
rand(3, 3).astype('float16')]
float16_shared = [gpuarray_shared_constructor(val)
for val in float16_data]
o = gpu_dot22(*float16_shared)
f = theano.function([], o)
x, y = float16_data
out = f()
utt.assert_allclose(np.asarray(out), np.dot(x, y))
class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin): class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
mode = mode_with_gpu mode = mode_with_gpu
dtype = 'float32' dtype = 'float32'
...@@ -51,6 +87,7 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin): ...@@ -51,6 +87,7 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
GpuGemmTester = makeTester( GpuGemmTester = makeTester(
'GpuGemmTester', 'GpuGemmTester',
op=gemm_inplace, gpu_op=gpugemm_inplace, op=gemm_inplace, gpu_op=gpugemm_inplace,
# float16 tested in test_float16
cases=dict(test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0], cases=dict(test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0],
test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0], test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0],
test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0], test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0],
...@@ -59,7 +96,12 @@ GpuGemmTester = makeTester( ...@@ -59,7 +96,12 @@ GpuGemmTester = makeTester(
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0], test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0], test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1], test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1], float32=[rand(3, 4).astype('float32'), np.float32(-1.0),
rand(3, 5).astype('float32'),
rand(5, 4).astype('float32'), np.float32(-1.1)],
float64=[rand(3, 4).astype('float64'), np.float64(-1.0),
rand(3, 5).astype('float64'),
rand(5, 4).astype('float64'), np.float64(-1.1)],
# test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0], # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1], # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1], # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
...@@ -68,14 +110,29 @@ GpuGemmTester = makeTester( ...@@ -68,14 +110,29 @@ GpuGemmTester = makeTester(
) )
gemm_batched_tests = dict(
("test_b%im%ik%in%i" % (b, m, k, n),
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
# float16 not supported
gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
rand().astype('float32'),
rand(3, 4, 4).astype('float32'),
rand(3, 4, 7).astype('float32'),
rand().astype('float32')]
gemm_batched_tests['float64'] = [rand(3, 4, 7).astype('float64'),
rand().astype('float64'),
rand(3, 4, 4).astype('float64'),
rand(3, 4, 7).astype('float64'),
rand().astype('float64')]
GpuGemmBatchTester = makeTester( GpuGemmBatchTester = makeTester(
'GpuGemmBatchTester', 'GpuGemmBatchTester',
op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z, op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z,
gpu_op=gpugemmbatch_no_inplace, gpu_op=gpugemmbatch_no_inplace,
cases=dict( cases=gemm_batched_tests
("test_b%im%ik%in%i" % (b, m, k, n), )
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)))
class TestGpuSger(TestGer): class TestGpuSger(TestGer):
......
...@@ -493,6 +493,27 @@ def test_many_arg_elemwise(): ...@@ -493,6 +493,27 @@ def test_many_arg_elemwise():
utt.assert_allclose(results_gpu, results_cpu) utt.assert_allclose(results_gpu, results_cpu)
def test_not_useless_scalar_gpuelemwise():
# We don't want to move elemwise on scalar on the GPU when the
# result will not be used on the GPU!
with theano.configparser.change_flags(warn_float64='ignore'):
X = tensor.fmatrix()
x = np.random.randn(32, 32).astype(np.float32)
m1 = theano.shared(np.random.randn(32, 32).astype(np.float32))
loss = (X - tensor.dot(X, m1)).norm(L=2)
lr = theano.shared(np.asarray(.001, dtype=np.float32))
grad = tensor.grad(loss, m1)
train = theano.function(inputs=[X], updates=[(m1, m1 - lr * grad)],
mode=mode_with_gpu)
train(x)
topo = train.maker.fgraph.toposort()
gemms = [app for app in topo if isinstance(app.op, GpuGemm)]
assert len(gemms) == 2
assert isinstance(gemms[1].inputs[1].owner.op, tensor.Elemwise)
def test_local_lift_abstractconv_gpu_shape(): def test_local_lift_abstractconv_gpu_shape():
prev = theano.config.on_opt_error prev = theano.config.on_opt_error
try: try:
......
...@@ -24,11 +24,25 @@ except ImportError: ...@@ -24,11 +24,25 @@ except ImportError:
_context_reg = {} _context_reg = {}
def gpu_supported(data):
"""
Is the following data supported on the GPU?
Currently, only complex aren't supported.
Parameters
----------
data : numpy.ndarray or TensorVariable
(it must have dtype and ndim parameter)
"""
return str(data.dtype) not in tensor.basic.complex_dtypes
def move_to_gpu(data): def move_to_gpu(data):
""" """
Do we want to move this computation to the GPU? Do we want to move this computation to the GPU?
Currently, we don't move complex and scalar int. Currently, we don't move complex and scalar.
Parameters Parameters
---------- ----------
...@@ -36,10 +50,10 @@ def move_to_gpu(data): ...@@ -36,10 +50,10 @@ def move_to_gpu(data):
(it must have dtype and ndim parameter) (it must have dtype and ndim parameter)
""" """
# We don't support complex on the GPU # We don't support complex on the GPU
if str(data.dtype) in tensor.basic.complex_dtypes: if not gpu_supported(data):
return False return False
# We don't want scalar int on the GPU. # We don't want scalars on the GPU.
if data.ndim == 0 and str(data.dtype) in tensor.basic.discrete_dtypes: if data.ndim == 0:
return False return False
return True return True
...@@ -637,7 +651,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False, ...@@ -637,7 +651,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
if target is notset: if target is notset:
target = None target = None
if not move_to_gpu(value): if not gpu_supported(value):
raise TypeError('We do not move that data by default to the GPU') raise TypeError('We do not move that data by default to the GPU')
try: try:
get_context(target) get_context(target)
......
...@@ -317,7 +317,7 @@ class Ger(Op): ...@@ -317,7 +317,7 @@ class Ger(Op):
y = T.as_tensor_variable(y) y = T.as_tensor_variable(y)
x = T.as_tensor_variable(x) x = T.as_tensor_variable(x)
alpha = T.as_tensor_variable(alpha) alpha = T.as_tensor_variable(alpha)
if len(set([A.dtype, alpha.dtype, x.dtype, y.dtype])) != 1: if not(A.dtype == x.dtype == y.dtype == alpha.dtype):
raise TypeError('ger requires matching dtypes', raise TypeError('ger requires matching dtypes',
(A.dtype, alpha.dtype, x.dtype, y.dtype)) (A.dtype, alpha.dtype, x.dtype, y.dtype))
if alpha.ndim != 0: if alpha.ndim != 0:
...@@ -852,9 +852,6 @@ class Gemm(GemmRelated): ...@@ -852,9 +852,6 @@ class Gemm(GemmRelated):
(self, len(inputs))) (self, len(inputs)))
z, a, x, y, b = inputs z, a, x, y, b = inputs
# For the consistency check we don't want z to be a cached constant.
if getattr(z, 'cached', False):
z = copy.copy(z)
zr, xr, yr = [set(view_roots(i)) for i in (z, x, y)] zr, xr, yr = [set(view_roots(i)) for i in (z, x, y)]
# We want the gemm to be inplace. When this op is inplace, it # We want the gemm to be inplace. When this op is inplace, it
...@@ -867,10 +864,11 @@ class Gemm(GemmRelated): ...@@ -867,10 +864,11 @@ class Gemm(GemmRelated):
# think there is another mechanism that would prevent this, # think there is another mechanism that would prevent this,
# but I don't what to modify old code and have chance to break # but I don't what to modify old code and have chance to break
# something. # something.
if zr.intersection(xr): if self.inplace:
raise InconsistencyError(Gemm.E_z_uniq, (z, x)) if zr.intersection(xr):
if zr.intersection(yr): raise InconsistencyError(Gemm.E_z_uniq, (z, x))
raise InconsistencyError(Gemm.E_z_uniq, (z, y)) if zr.intersection(yr):
raise InconsistencyError(Gemm.E_z_uniq, (z, y))
if z.ndim != 2: if z.ndim != 2:
raise TypeError(Gemm.E_rank, z) raise TypeError(Gemm.E_rank, z)
......
...@@ -105,7 +105,7 @@ class t_gemm(TestCase): ...@@ -105,7 +105,7 @@ class t_gemm(TestCase):
def test0a(self): def test0a(self):
Gemm.debug = True Gemm.debug = True
try: try:
g = gemm_inplace([1.], 1., [1.], [1.], 1.) g = gemm_no_inplace([1.], 1., [1.], [1.], 1.)
except TypeError as e: except TypeError as e:
if exc_message(e) is Gemm.E_rank: if exc_message(e) is Gemm.E_rank:
return return
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论