Add explicit tests for float32/float64. float16 tests don't pass, so they are commented for now.

a7b4a79e · Frederic Bastien · 10a674a0 · a7b4a79e · a7b4a79e · a7b4a79e
--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
@@ -58,7 +58,11 @@ class GpuGemv(BlasOp):
        assert A.ndim == 2
        assert x.ndim == 1
        assert y.ndim == 1
-        assert A.dtype == x.dtype == y.dtype == alpha.dtype == beta.dtype
+        assert A.dtype == x.dtype == y.dtype
+        if A.dtype == 'float16':
+            assert alpha.dtype == beta.dtype == 'float32'
+        else:
+            assert alpha.dtype == beta.dtype == A.dtype
        return Apply(self, [y, alpha, A, x, beta], [y.type()])
    def perform(self, node, inputs, out_storage):
@@ -165,10 +169,14 @@ class GpuGemm(BlasOp):
        alpha = as_tensor_variable(alpha)
        beta = as_tensor_variable(beta)
-        if not (A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype):
+        if not (A.dtype == B.dtype == C.dtype):
            raise TypeError(Gemm.E_mixed,
                            (A.dtype, B.dtype, C.dtype,
                             alpha.dtype, beta.dtype))
+        if A.dtype == 'float16':
+            assert alpha.dtype == beta.dtype == 'float32'
+        else:
+            assert alpha.dtype == beta.dtype == A.dtype
        if not A.dtype.startswith('float'):
            raise TypeError(Gemm.E_float, (A.dtype))
        assert alpha.ndim == 0
@@ -176,7 +184,6 @@ class GpuGemm(BlasOp):
        assert A.ndim == 2
        assert B.ndim == 2
        assert C.ndim == 2
-        assert A.dtype == B.dtype == C.dtype
        return Apply(self, [C, alpha, A, B, beta], [C.type()])
    def perform(self, node, inputs, outputs):

--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -25,6 +25,15 @@ GpuGemvTester = makeTester(
    op=gemv_inplace, gpu_op=gpugemv_inplace,
    cases=dict(dot_vv=[rand(1), 1, rand(1, 2), rand(2), 0],
               dot_vm=[rand(3), 1, rand(3, 2), rand(2), 0],
+#               float16=[rand(3).astype('float16'), np.float32(1),
+#                        rand(3, 2).astype('float16'),
+#                        rand(2).astype('float16'), np.float32(0)],
+               float32=[rand(3).astype('float32'), np.float32(1),
+                        rand(3, 2).astype('float32'),
+                        rand(2).astype('float32'), np.float32(0)],
+               float64=[rand(3).astype('float64'), np.float64(1),
+                        rand(3, 2).astype('float64'),
+                        rand(2).astype('float64'), np.float64(0)],
               # test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
               # test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
               # test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
@@ -59,9 +68,15 @@ GpuGemmTester = makeTester(
               test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
               test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
               test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
-               test9=[rand(3, 4).astype('float32'), np.float32(-1.0),
+#               float16=[rand(3, 4).astype('float16'), np.float32(-1.0),
-                      rand(3, 5).astype('float32'),
+#                        rand(3, 5).astype('float16'),
-                      rand(5, 4).astype('float32'), np.float32(-1.1)],
+#                        rand(5, 4).astype('float16'), np.float32(-1.1)],
+               float32=[rand(3, 4).astype('float32'), np.float32(-1.0),
+                        rand(3, 5).astype('float32'),
+                        rand(5, 4).astype('float32'), np.float32(-1.1)],
+               float64=[rand(3, 4).astype('float64'), np.float64(-1.0),
+                        rand(3, 5).astype('float64'),
+                        rand(5, 4).astype('float64'), np.float64(-1.1)],
               # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
               # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
               # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
@@ -74,11 +89,21 @@ gemm_batched_tests = dict(
    ("test_b%im%ik%in%i" % (b, m, k, n),
     [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
    for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
+#gemm_batched_tests['float16'] = [rand(3, 4, 7).astype('float16'),
+#                                 rand().astype('float32'),
+#                                 rand(3, 4, 4).astype('float16'),
+#                                 rand(3, 4, 7).astype('float16'),
+#                                 rand().astype('float32')]
 gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
                                 rand().astype('float32'),
                                 rand(3, 4, 4).astype('float32'),
                                 rand(3, 4, 7).astype('float32'),
                                 rand().astype('float32')]
+gemm_batched_tests['float64'] = [rand(3, 4, 7).astype('float64'),
+                                 rand().astype('float64'),
+                                 rand(3, 4, 4).astype('float64'),
+                                 rand(3, 4, 7).astype('float64'),
+                                 rand().astype('float64')]
 GpuGemmBatchTester = makeTester(

--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -24,6 +24,9 @@ except ImportError:
 _context_reg = {}
+def do_gpu_support(data):
+    return str(data.dtype) not in tensor.basic.complex_dtypes
 def move_to_gpu(data):
    """
    Do we want to move this computation to the GPU?
@@ -36,7 +39,7 @@ def move_to_gpu(data):
           (it must have dtype and ndim parameter)
    """
    # We don't support complex on the GPU
-    if str(data.dtype) in tensor.basic.complex_dtypes:
+    if not do_gpu_support(data):
        return False
    # We don't want scalars on the GPU.
    if data.ndim == 0:
@@ -637,7 +640,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
    if target is notset:
        target = None
-        if not move_to_gpu(value):
+        if not do_gpu_support(value):
            raise TypeError('We do not move that data by default to the GPU')
    try:
        get_context(target)