Do not force float64 in the graph when it isn't needed. Do the same dtype…

Do not force float64 in the graph when it isn't needed. Do the same dtype requirement as on the CPU.

Do not force float64 in the graph when it isn't needed. Do the same dtype…
10a674a0 · Frederic Bastien · fbe25e32 · 10a674a0 · 10a674a0 · 10a674a0
--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
@@ -50,16 +50,15 @@ class GpuGemv(BlasOp):
        A = as_gpuarray_variable(A, ctx_name)
        x = as_gpuarray_variable(x, ctx_name)
        y = as_gpuarray_variable(y, ctx_name)
-        with theano.configparser.change_flags(warn_float64='ignore'):
-            alpha = as_tensor_variable(alpha).astype('float64')
-            beta = as_tensor_variable(beta).astype('float64')
+        alpha = as_tensor_variable(alpha)
+        beta = as_tensor_variable(beta)

        assert alpha.ndim == 0
        assert beta.ndim == 0
        assert A.ndim == 2
        assert x.ndim == 1
        assert y.ndim == 1
-        assert A.dtype == x.dtype == y.dtype
+        assert A.dtype == x.dtype == y.dtype == alpha.dtype == beta.dtype
        return Apply(self, [y, alpha, A, x, beta], [y.type()])

    def perform(self, node, inputs, out_storage):
@@ -163,9 +162,15 @@ class GpuGemm(BlasOp):
        A = as_gpuarray_variable(A, ctx_name)
        B = as_gpuarray_variable(B, ctx_name)
        C = as_gpuarray_variable(C, ctx_name)
-        with theano.configparser.change_flags(warn_float64='ignore'):
-            alpha = as_tensor_variable(alpha).astype('float64')
-            beta = as_tensor_variable(beta).astype('float64')
+        alpha = as_tensor_variable(alpha)
+        beta = as_tensor_variable(beta)
+
+        if not (A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype):
+            raise TypeError(Gemm.E_mixed,
+                            (A.dtype, B.dtype, C.dtype,
+                             alpha.dtype, beta.dtype))
+        if not A.dtype.startswith('float'):
+            raise TypeError(Gemm.E_float, (A.dtype))
        assert alpha.ndim == 0
        assert beta.ndim == 0
        assert A.ndim == 2
@@ -244,8 +249,11 @@ class GpuGer(BlasOp):
        A = as_gpuarray_variable(A, ctx_name)
        x = as_gpuarray_variable(x, ctx_name)
        y = as_gpuarray_variable(y, ctx_name)
-        with theano.configparser.change_flags(warn_float64='ignore'):
-            alpha = as_tensor_variable(alpha).astype('float64')
+        alpha = as_tensor_variable(alpha)
+        if len(set([A.dtype, alpha.dtype, x.dtype, y.dtype])) != 1:
+            raise TypeError('ger requires matching dtypes',
+                            (A.dtype, alpha.dtype, x.dtype, y.dtype))
+
        assert alpha.ndim == 0
        assert A.ndim == 2
        assert x.ndim == 1
@@ -383,15 +391,14 @@ class GpuGemmBatch(BlasOp):
        A = as_gpuarray_variable(A, ctx_name)
        B = as_gpuarray_variable(B, ctx_name)
        C = as_gpuarray_variable(C, ctx_name)
-        with theano.configparser.change_flags(warn_float64='ignore'):
-            alpha = as_tensor_variable(alpha).astype('float64')
-            beta = as_tensor_variable(beta).astype('float64')
+        alpha = as_tensor_variable(alpha)
+        beta = as_tensor_variable(beta)
        assert alpha.ndim == 0
        assert beta.ndim == 0
        assert A.ndim == 3
        assert B.ndim == 3
        assert C.ndim == 3
-        assert A.dtype == B.dtype == C.dtype
+        assert A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype
        return Apply(self, [C, alpha, A, B, beta], [C.type()])

    def c_headers(self):

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1185,7 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
 def local_gpua_gemmbatch(op, context_name, inputs, outputs):
    a, b = inputs
    c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
-    return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0)
+    return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
+                                   a, b, np.asarray(0.0, dtype=a.dtype))


 @register_opt()

--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -59,7 +59,9 @@ GpuGemmTester = makeTester(
               test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
               test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
               test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
-               test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1],
+               test9=[rand(3, 4).astype('float32'), np.float32(-1.0),
+                      rand(3, 5).astype('float32'),
+                      rand(5, 4).astype('float32'), np.float32(-1.1)],
               # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
               # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
               # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
@@ -68,14 +70,23 @@ GpuGemmTester = makeTester(
    )


+gemm_batched_tests = dict(
+    ("test_b%im%ik%in%i" % (b, m, k, n),
+     [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
+    for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
+gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
+                                 rand().astype('float32'),
+                                 rand(3, 4, 4).astype('float32'),
+                                 rand(3, 4, 7).astype('float32'),
+                                 rand().astype('float32')]
+
+
 GpuGemmBatchTester = makeTester(
    'GpuGemmBatchTester',
    op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z,
    gpu_op=gpugemmbatch_no_inplace,
-    cases=dict(
-        ("test_b%im%ik%in%i" % (b, m, k, n),
-         [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
-        for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4)))
+    cases=gemm_batched_tests
+    )


 class TestGpuSger(TestGer):