提交 10a674a0 authored 作者: Frederic Bastien's avatar Frederic Bastien

Do not force float64 in the graph when it isn't needed. Do the same dtype…

Do not force float64 in the graph when it isn't needed. Do the same dtype requirement as on the CPU.
上级 fbe25e32
...@@ -50,16 +50,15 @@ class GpuGemv(BlasOp): ...@@ -50,16 +50,15 @@ class GpuGemv(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
assert A.ndim == 2 assert A.ndim == 2
assert x.ndim == 1 assert x.ndim == 1
assert y.ndim == 1 assert y.ndim == 1
assert A.dtype == x.dtype == y.dtype assert A.dtype == x.dtype == y.dtype == alpha.dtype == beta.dtype
return Apply(self, [y, alpha, A, x, beta], [y.type()]) return Apply(self, [y, alpha, A, x, beta], [y.type()])
def perform(self, node, inputs, out_storage): def perform(self, node, inputs, out_storage):
...@@ -163,9 +162,15 @@ class GpuGemm(BlasOp): ...@@ -163,9 +162,15 @@ class GpuGemm(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name) B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name) C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
if not (A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype):
raise TypeError(Gemm.E_mixed,
(A.dtype, B.dtype, C.dtype,
alpha.dtype, beta.dtype))
if not A.dtype.startswith('float'):
raise TypeError(Gemm.E_float, (A.dtype))
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
assert A.ndim == 2 assert A.ndim == 2
...@@ -244,8 +249,11 @@ class GpuGer(BlasOp): ...@@ -244,8 +249,11 @@ class GpuGer(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) y = as_gpuarray_variable(y, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') if len(set([A.dtype, alpha.dtype, x.dtype, y.dtype])) != 1:
raise TypeError('ger requires matching dtypes',
(A.dtype, alpha.dtype, x.dtype, y.dtype))
assert alpha.ndim == 0 assert alpha.ndim == 0
assert A.ndim == 2 assert A.ndim == 2
assert x.ndim == 1 assert x.ndim == 1
...@@ -383,15 +391,14 @@ class GpuGemmBatch(BlasOp): ...@@ -383,15 +391,14 @@ class GpuGemmBatch(BlasOp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
B = as_gpuarray_variable(B, ctx_name) B = as_gpuarray_variable(B, ctx_name)
C = as_gpuarray_variable(C, ctx_name) C = as_gpuarray_variable(C, ctx_name)
with theano.configparser.change_flags(warn_float64='ignore'): alpha = as_tensor_variable(alpha)
alpha = as_tensor_variable(alpha).astype('float64') beta = as_tensor_variable(beta)
beta = as_tensor_variable(beta).astype('float64')
assert alpha.ndim == 0 assert alpha.ndim == 0
assert beta.ndim == 0 assert beta.ndim == 0
assert A.ndim == 3 assert A.ndim == 3
assert B.ndim == 3 assert B.ndim == 3
assert C.ndim == 3 assert C.ndim == 3
assert A.dtype == B.dtype == C.dtype assert A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype
return Apply(self, [C, alpha, A, B, beta], [C.type()]) return Apply(self, [C, alpha, A, B, beta], [C.type()])
def c_headers(self): def c_headers(self):
......
...@@ -1185,7 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs): ...@@ -1185,7 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
def local_gpua_gemmbatch(op, context_name, inputs, outputs): def local_gpua_gemmbatch(op, context_name, inputs, outputs):
a, b = inputs a, b = inputs
c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2]) c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0) return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
a, b, np.asarray(0.0, dtype=a.dtype))
@register_opt() @register_opt()
......
...@@ -59,7 +59,9 @@ GpuGemmTester = makeTester( ...@@ -59,7 +59,9 @@ GpuGemmTester = makeTester(
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0], test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0], test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1], test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1], test9=[rand(3, 4).astype('float32'), np.float32(-1.0),
rand(3, 5).astype('float32'),
rand(5, 4).astype('float32'), np.float32(-1.1)],
# test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0], # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1], # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1], # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
...@@ -68,14 +70,23 @@ GpuGemmTester = makeTester( ...@@ -68,14 +70,23 @@ GpuGemmTester = makeTester(
) )
gemm_batched_tests = dict(
("test_b%im%ik%in%i" % (b, m, k, n),
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
rand().astype('float32'),
rand(3, 4, 4).astype('float32'),
rand(3, 4, 7).astype('float32'),
rand().astype('float32')]
GpuGemmBatchTester = makeTester( GpuGemmBatchTester = makeTester(
'GpuGemmBatchTester', 'GpuGemmBatchTester',
op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z, op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z,
gpu_op=gpugemmbatch_no_inplace, gpu_op=gpugemmbatch_no_inplace,
cases=dict( cases=gemm_batched_tests
("test_b%im%ik%in%i" % (b, m, k, n), )
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)))
class TestGpuSger(TestGer): class TestGpuSger(TestGer):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论