提交 10a674a0 authored 作者: Frederic Bastien's avatar Frederic Bastien

Do not force float64 in the graph when it isn't needed. Do the same dtype…

Do not force float64 in the graph when it isn't needed. Do the same dtype requirement as on the CPU.
上级 fbe25e32
......@@ -50,16 +50,15 @@ class GpuGemv(BlasOp):
A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'):
alpha = as_tensor_variable(alpha).astype('float64')
beta = as_tensor_variable(beta).astype('float64')
alpha = as_tensor_variable(alpha)
beta = as_tensor_variable(beta)
assert alpha.ndim == 0
assert beta.ndim == 0
assert A.ndim == 2
assert x.ndim == 1
assert y.ndim == 1
assert A.dtype == x.dtype == y.dtype
assert A.dtype == x.dtype == y.dtype == alpha.dtype == beta.dtype
return Apply(self, [y, alpha, A, x, beta], [y.type()])
def perform(self, node, inputs, out_storage):
......@@ -163,9 +162,15 @@ class GpuGemm(BlasOp):
A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'):
alpha = as_tensor_variable(alpha).astype('float64')
beta = as_tensor_variable(beta).astype('float64')
alpha = as_tensor_variable(alpha)
beta = as_tensor_variable(beta)
if not (A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype):
raise TypeError(Gemm.E_mixed,
(A.dtype, B.dtype, C.dtype,
alpha.dtype, beta.dtype))
if not A.dtype.startswith('float'):
raise TypeError(Gemm.E_float, (A.dtype))
assert alpha.ndim == 0
assert beta.ndim == 0
assert A.ndim == 2
......@@ -244,8 +249,11 @@ class GpuGer(BlasOp):
A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'):
alpha = as_tensor_variable(alpha).astype('float64')
alpha = as_tensor_variable(alpha)
if len(set([A.dtype, alpha.dtype, x.dtype, y.dtype])) != 1:
raise TypeError('ger requires matching dtypes',
(A.dtype, alpha.dtype, x.dtype, y.dtype))
assert alpha.ndim == 0
assert A.ndim == 2
assert x.ndim == 1
......@@ -383,15 +391,14 @@ class GpuGemmBatch(BlasOp):
A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'):
alpha = as_tensor_variable(alpha).astype('float64')
beta = as_tensor_variable(beta).astype('float64')
alpha = as_tensor_variable(alpha)
beta = as_tensor_variable(beta)
assert alpha.ndim == 0
assert beta.ndim == 0
assert A.ndim == 3
assert B.ndim == 3
assert C.ndim == 3
assert A.dtype == B.dtype == C.dtype
assert A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype
return Apply(self, [C, alpha, A, B, beta], [C.type()])
def c_headers(self):
......
......@@ -1185,7 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
def local_gpua_gemmbatch(op, context_name, inputs, outputs):
a, b = inputs
c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0)
return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
a, b, np.asarray(0.0, dtype=a.dtype))
@register_opt()
......
......@@ -59,7 +59,9 @@ GpuGemmTester = makeTester(
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1],
test9=[rand(3, 4).astype('float32'), np.float32(-1.0),
rand(3, 5).astype('float32'),
rand(5, 4).astype('float32'), np.float32(-1.1)],
# test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
......@@ -68,14 +70,23 @@ GpuGemmTester = makeTester(
)
gemm_batched_tests = dict(
("test_b%im%ik%in%i" % (b, m, k, n),
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
rand().astype('float32'),
rand(3, 4, 4).astype('float32'),
rand(3, 4, 7).astype('float32'),
rand().astype('float32')]
GpuGemmBatchTester = makeTester(
'GpuGemmBatchTester',
op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z,
gpu_op=gpugemmbatch_no_inplace,
cases=dict(
("test_b%im%ik%in%i" % (b, m, k, n),
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)))
cases=gemm_batched_tests
)
class TestGpuSger(TestGer):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论