Add a check at C code generation time for ops that are not explicitely

tagged for float16 support and disable their C code.

Add a check at C code generation time for ops that are not explicitely
8d0b398d · Arnaud Bergeron · 708efc22 · 8d0b398d · 8d0b398d · 8d0b398d
--- a/theano/compile/ops.py
+++ b/theano/compile/ops.py
@@ -215,6 +215,8 @@ class Shape(gof.Op):

    @note: Non-differentiable.
    """
+    _fp16_ok = True
+
    # Mapping from Type to C code (and version) to use.
    # In the C code, the name of the input variable is %(iname)s,
    # the output variable is %(oname)s.
@@ -308,6 +310,8 @@ class Shape_i(gof.Op):

    @note: Non-differentiable.
    """
+    _fp16_ok = True
+
    # Mapping from Type to C code (and version) to use.
    # In the C code, the name of the input variable is %(iname)s,
    # the output variable is %(oname)s.

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
        node_output_storage = [storage_map[r] for r in node.outputs]
        node_input_compute = [compute_map[r] for r in node.inputs]
        node_output_compute = [compute_map[r] for r in node.outputs]
-        #logger.debug('Compiling node %i of graph' % node_idx)
+
        if self._op_use_c_code:
            try:
+                # float16 get special treatment since running
+                # unprepared C code will get bad results.
+                if not getattr(self, '_f16_ok', False):
+                    def is_f16(t):
+                        return getattr(t, 'dtype', '') == 'float16'
+
+                    if (any(is_f16(i.type) for i in node.inputs) or
+                            any(is_f16(o.type) for o in node.outputs)):
+                        print ("Disabling C code for %s due to unsupported "
+                               "float16" % (self,))
+                        raise NotImplementedError("float16")
                e = FunctionGraph(node.inputs, node.outputs)

                e_no_recycling = [new_o

--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -187,11 +187,8 @@ class GpuKernelBase(object):


 class HostFromGpu(Op):
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
+    __props__ = ()
+    _f16_ok = True

    def __str__(self):
        return 'HostFromGpu(gpuarray)'
@@ -270,11 +267,8 @@ host_from_gpu = HostFromGpu()


 class GpuFromHost(Op):
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
+    __props__ = ()
+    _f16_ok = True

    def __str__(self):
        return 'GpuFromHost(gpuarray)'
@@ -574,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()


 class GpuAlloc(HideC, Alloc):
+    __props__ = ('memset_0',)
+    _f16_ok = True
+
    def __init__(self, memset_0=False):
        """memset_0 is only an optimized version. True, it mean the
        value is always 0, so the c code call memset as it is faster.
        """
        self.memset_0 = memset_0

-    def __eq__(self, other):
-        return type(self) == type(other) and self.memset_0 == other.memset_0
-
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.memset_0)
-
    def __str__(self):
        # Hide the memset parameter when not used to prevent confusion.
        if self.memset_0:
@@ -729,25 +720,17 @@ class GpuContiguous(Op):
    Always return a c contiguous output. Copy the input only if it is
    not already c contiguous.
    """
+    __props__ = ()
    view_map = {0: [0]}
-
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
+    _f16_ok = True

    def grad(self, inputs, dout):
-
        x, = inputs
        dout, = dout
        dout = as_gpuarray_variable(dout)

        return [dout]

-    def __str__(self):
-        return self.__class__.__name__
-
    def make_node(self, input):
        input = as_gpuarray_variable(input)
        return Apply(self, [input], [input.type()])
@@ -795,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
    """
    Implement Reshape on the gpu.
    """
+    _f16_ok = True
+
    # __hash__, __eq__, __str__ come from tensor.Reshape
    def make_node(self, x, shp):
        x = as_gpuarray_variable(x)
@@ -832,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):


 class GpuJoin(HideC, Join):
+    _f16_ok = True
+
    def make_node(self, axis, *tensors):
        node = Join.make_node(self, axis, *tensors)

@@ -890,6 +877,7 @@ class GpuSplit(HideC, Split):

 class GpuEye(GpuKernelBase, Op):
    __props__ = ('dtype',)
+    _f16_ok = True

    def __init__(self, dtype=None):
        if dtype is None:

--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -61,6 +61,7 @@ def as_C_string_const(s):
 class GpuElemwise(HideC, Elemwise):
    nin = property(lambda self: self.scalar_op.nin)
    nout = property(lambda self: self.scalar_op.nout)
+    _f16_ok = True

    def __str__(self):
        if self.name is not None:
@@ -475,6 +476,8 @@ class SupportCodeError(Exception):


 class GpuDimShuffle(HideC, DimShuffle):
+    _f16_ok = True
+
    def make_node(self, input):
        res = DimShuffle.make_node(self, input)
        otype = GpuArrayType(dtype=res.outputs[0].type.dtype,
@@ -602,8 +605,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):

    pre_scalar_op: if present, must be a scalar op with only 1
    input. We will execute it on the input value before reduction.
-
    """
+    _f16_ok = True

    def __init__(self, scalar_op, axis=None,
                 reduce_mask=None, dtype=None, acc_dtype=None,

--- a/theano/sandbox/gpuarray/nnet.py
+++ b/theano/sandbox/gpuarray/nnet.py
@@ -25,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
    """
    nin = 3
    nout = 3
-
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
-
-    def __str__(self):
-        return self.__class__.__name__
+    __props__ = ()
+    _f16_ok = True

    def make_node(self, x, b, y_idx):
        # N.B. won't work when we don't cast y_idx to float anymore
@@ -282,21 +275,12 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
 class GpuCrossentropySoftmax1HotWithBiasDx(Op):
    """
    Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
+
+    Gradient wrt x of the CrossentropySoftmax1Hot Op
    """
    nin = 3
    nout = 1
-    """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
-    def __init__(self, **kwargs):
-        Op.__init__(self, **kwargs)
-
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
-
-    def __str__(self):
-        return self.__class__.__name__
+    __props__ = ()

    def make_node(self, dnll, sm, y_idx):
        dnll = as_gpuarray_variable(dnll)

--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -21,6 +21,8 @@ from .comp import NVCC_compiler


 class GpuSubtensor(HideC, Subtensor):
+    _fp16_ok = True
+
    def make_node(self, x, *inputs):
        rval = tensor.Subtensor.make_node(self, x, *inputs)
        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -95,6 +95,7 @@ class DimShuffle(Op):
    transpose function.
    Adding, subtracting dimensions can be done with reshape.
    """
+    _f16_ok = True

    check_input = False