Make float16 stuff correct. Only test ops that support it and disable opt when it don't support it.

3173f02c · Frederic Bastien · 05c424e7 · 3173f02c · 3173f02c
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1163,6 +1163,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
 @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
 @register_opt2([tensor.blas.Gemv], 'fast_compile')
 def local_gpua_gemv(op, context_name, inputs, outputs):
+    if inputs[0].dtype not in ['float32', 'float64']:
+        return
    if op.inplace:
        return gpugemv_inplace
    else:
@@ -1183,6 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
 @op_lifter([tensor.blas.BatchedDot])
 @register_opt2([tensor.blas.BatchedDot], 'fast_compile')
 def local_gpua_gemmbatch(op, context_name, inputs, outputs):
+    if inputs[0].dtype not in ['float32', 'float64']:
+        return
    a, b = inputs
    c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
    return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
@@ -1217,6 +1221,8 @@ def local_gpua_gemmbatch_output_merge(node, *inputs):
 @op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
 @register_opt2([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer], 'fast_compile')
 def local_gpua_ger(op, context_name, inputs, outputs):
+    if inputs[0].dtype not in ['float32', 'float64']:
+        return
    return GpuGer(inplace=op.destructive)



--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -15,7 +15,8 @@ from .config import mode_with_gpu
 from .test_basic_ops import makeTester, rand

 from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
-                    gpugemm_inplace, gpugemmbatch_no_inplace,
+                    gpugemm_inplace, gpugemm_no_inplace,
+                    gpugemmbatch_no_inplace,
                    gpuger_inplace, gpuger_no_inplace,
                    GpuGer, gpu_dot22)

@@ -23,11 +24,9 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
 GpuGemvTester = makeTester(
    'GpuGemvTester',
    op=gemv_inplace, gpu_op=gpugemv_inplace,
+    # It don't support float16
    cases=dict(dot_vv=[rand(1), 1, rand(1, 2), rand(2), 0],
               dot_vm=[rand(3), 1, rand(3, 2), rand(2), 0],
-               # float16=[rand(3).astype('float16'), np.float32(1),
-               # rand(3, 2).astype('float16'),
-               # rand(2).astype('float16'), np.float32(0)],
               float32=[rand(3).astype('float32'), np.float32(1),
                        rand(3, 2).astype('float32'),
                        rand(2).astype('float32'), np.float32(0)],
@@ -42,19 +41,32 @@ GpuGemvTester = makeTester(
    )


-def test_gemv_float16():
-    float16 = [rand(3).astype('float16'),
-               np.asarray(1, dtype=np.float32),
-               rand(3, 2).astype('float16'),
-               rand(2).astype('float16'),
-               np.asarray(0.5, dtype=np.float32)]
-    float16 = [gpuarray_shared_constructor(val)
-               for val in float16]
-    o = gpugemv_no_inplace(*float16)
+def test_float16():
+    # gemm
+    float16_data = [rand(3, 3).astype('float16'),
+                    np.asarray(1, dtype=np.float32),
+                    rand(3, 3).astype('float16'),
+                    rand(3, 3).astype('float16'),
+                    np.asarray(0.5, dtype=np.float32)]
+    float16_shared = [gpuarray_shared_constructor(val)
+                      for val in float16_data]
+    o = gpugemm_no_inplace(*float16_shared)
    f = theano.function([], o)
-    y, alpha, A, x, beta = float16
+    y, alpha, A, x, beta = float16_data
    out = f()
-    utt.assert_asclose(out, alpha * np.dot(A, x) + beta * y)
+    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
+
+    # dot22
+    float16_data = [rand(3, 3).astype('float16'),
+                    rand(3, 3).astype('float16')]
+
+    float16_shared = [gpuarray_shared_constructor(val)
+                      for val in float16_data]
+    o = gpu_dot22(*float16_shared)
+    f = theano.function([], o)
+    x, y = float16_data
+    out = f()
+    utt.assert_allclose(np.asarray(out), np.dot(x, y))


 class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
@@ -75,6 +87,7 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
 GpuGemmTester = makeTester(
    'GpuGemmTester',
    op=gemm_inplace, gpu_op=gpugemm_inplace,
+    # float16 tested in test_float16
    cases=dict(test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0],
               test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0],
               test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0],
@@ -83,9 +96,6 @@ GpuGemmTester = makeTester(
               test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
               test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
               test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
-               # float16=[rand(3, 4).astype('float16'), np.float32(-1.0),
-               # rand(3, 5).astype('float16'),
-               # rand(5, 4).astype('float16'), np.float32(-1.1)],
               float32=[rand(3, 4).astype('float32'), np.float32(-1.0),
                        rand(3, 5).astype('float32'),
                        rand(5, 4).astype('float32'), np.float32(-1.1)],
@@ -104,11 +114,7 @@ gemm_batched_tests = dict(
    ("test_b%im%ik%in%i" % (b, m, k, n),
     [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
    for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
-#gemm_batched_tests['float16'] = [rand(3, 4, 7).astype('float16'),
-#                                 rand().astype('float32'),
-#                                 rand(3, 4, 4).astype('float16'),
-#                                 rand(3, 4, 7).astype('float16'),
-#                                 rand().astype('float32')]
+# float16 not supported
 gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
                                 rand().astype('float32'),
                                 rand(3, 4, 4).astype('float32'),