Fix float16/cast handling and add/fix tests

0e595433 · Frederic Bastien · 12c78b33 · 0e595433 · 0e595433 · 0e595433
--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -108,9 +108,8 @@ class GpuElemwise(HideC, Elemwise):
        # load in float16 and cast to float32 and do the reverse for
        # the output.
        scalar_op = self.scalar_op
-        if isinstance(scalar_op, Composite):
+        if isinstance(scalar_op, (scalar.Cast, Composite)):
-            s = scalar_op.clone_float32()
+            scalar_op = scalar_op.clone_float32()
-            scalar_op = s
        fake_node = scalar_op.make_node(*scal_v_ins)
        scal_v_out = fake_node.outputs
        assert len(scal_v_out) == len(node.outputs)
@@ -119,9 +118,9 @@ class GpuElemwise(HideC, Elemwise):
                                  inps, outs,
                                  dict(fail='return;'))
-        if isinstance(scalar_op, Composite):
+        # If the following assert fail, then we need to update the
-            # Other op like cast should handle float16 themself.
+        # code handler above.
-            assert 'npy_float16' not in kop
+        assert 'npy_float16' not in kop
        support_code = ""
        try:

--- a/theano/gpuarray/tests/test_elemwise.py
+++ b/theano/gpuarray/tests/test_elemwise.py
@@ -52,27 +52,48 @@ def test_elemwise_pow():
            assert_allclose(out, expected_out)
-def test_composite_elemwise_float16():
+class test_float16():
-    w = theano.tensor.bvector()
+    def test_composite_elemwise_float16(self):
-    x = theano.tensor.vector(dtype='float16')
+        w = theano.tensor.bvector()
-    y = theano.tensor.fvector()
+        x = theano.tensor.vector(dtype='float16')
+        y = theano.tensor.fvector()
-    cz = tensor.tanh(x + tensor.cast(y, 'float16'))
-    o = (cz - cz**2 +
+        cz = tensor.tanh(x + tensor.cast(y, 'float16'))
-         tensor.cast(x, 'int16') + tensor.cast(x, 'float32') +
+        o = (cz - cz**2 +
-         tensor.cast(w, 'float16') -
+             tensor.cast(x, 'int16') + tensor.cast(x, 'float32') +
-         tensor.constant(numpy.float16(1.0)))
+             tensor.cast(w, 'float16') -
+             tensor.constant(numpy.float16(1.0)))
-    theano.function([w, x, y], o, mode=mode_with_gpu)
+        theano.function([w, x, y], o, mode=mode_with_gpu)
-    v = theano.tensor.vector(dtype='uint8')
-    w = theano.tensor.vector(dtype='float16')
+        v = theano.tensor.vector(dtype='uint8')
-    x = theano.tensor.vector(dtype='float16')
+        w = theano.tensor.vector(dtype='float16')
-    y = theano.tensor.vector(dtype='float16')
+        x = theano.tensor.vector(dtype='float16')
-    z = theano.tensor.vector(dtype='float16')
+        y = theano.tensor.vector(dtype='float16')
+        z = theano.tensor.vector(dtype='float16')
-    o = tensor.switch(v, tensor.mul(w, x, y), z)
-    theano.function([v, w, x, y, z], o, mode=mode_with_gpu)
+        o = tensor.switch(v, tensor.mul(w, x, y), z)
+        theano.function([v, w, x, y, z], o, mode=mode_with_gpu)
+    def test_cast_float16(self):
+        f16 = theano.tensor.vector(dtype='float16')
+        f32 = theano.tensor.fvector()
+        f = theano.function([f16, f32],
+                            [f16.astype('float32'),
+                             f32.astype('float16'),
+                             f32.astype('float64')],
+                            mode=mode_with_gpu)
+        d1 = numpy.random.rand(4).astype('float16')
+        d2 = numpy.random.rand(5).astype('float32')
+        res = f(d1, d2)
+        assert res[0].dtype == "float32"
+        assert res[1].dtype == "float16"
+        assert res[2].dtype == "float64"
+        assert_allclose(d1, res[0])
+        assert_allclose(d2, res[1])
+        assert_allclose(d2, res[2])
 class test_GpuDimShuffle(test_elemwise.test_DimShuffle):

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -2220,6 +2220,11 @@ class Cast(UnaryScalarOp):
    def __str__(self):
        return '%s{%s}' % (self.__class__.__name__, self.o_type.dtype)
+    def clone_float32(self):
+        if self.o_type == float16:
+            return identity
+        return self
    def make_new_inplace(self, output_types_preference=None, name=None):
        """
        This op.__init__ fct don't have the same parameter as other scalar op.

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -93,7 +93,7 @@ class test_composite(unittest.TestCase):
        y = float16()
        z = float16()
-        c = switch(v, mul(w, x, y), z)
+        c = Composite([v, w, x, y, z], [switch(v, mul(w, x, y), z)])
        assert has_f16(c)
        nc = c.clone_float32()