Merge pull request #5865 from nouiz/ifelse

fix opt warning in the ifelse lifter

Merge pull request #5865 from nouiz/ifelse
d2c753f8 · Pascal Lamblin · GitHub · e5d09827 · 11bc72a6 · d2c753f8
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -1341,6 +1341,8 @@ class GpuSplit(HideC, Split):
    Split for GPU.
    """
+    _f16_ok = True
    def __init__(self, len_splits):
        super(GpuSplit, self).__init__(len_splits)
        # The GPU version of Split returns splits as views of the input.

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -931,11 +931,20 @@ def local_gpua_lazy_ifelse(op, context_name, inputs, outputs):
        return
    c = inputs[0]
    inps = []
-    for v in inputs[1:]:
+    falses = []
-        if isinstance(v.type, tensor.TensorType) and move_to_gpu(v):
+    # ifelse need corresponding true/false inputs variables to be of the same type.
-            inps.append(as_gpuarray_variable(v, context_name))
+    # But we can't rely on inputs to respect that, as GraphToGPU don't enforce that.
+    # So we need to take care of this here.
+    for v1, v2 in zip(inputs[1:1 + op.n_outs], inputs[1 + op.n_outs:]):
+        if ((isinstance(v1.type, tensor.TensorType) and move_to_gpu(v1)) or
+                isinstance(v1.type, GpuArrayType) or
+                isinstance(v2.type, GpuArrayType)):
+            inps.append(as_gpuarray_variable(v1, context_name))
+            falses.append(as_gpuarray_variable(v2, context_name))
        else:
-            inps.append(v)
+            inps.append(v1)
+            falses.append(v2)
+    inps.extend(falses)
    return IfElse(op.n_outs, gpu=True)(c, *inps, return_list=True)

--- a/theano/gpuarray/tests/test_basic_ops.py
+++ b/theano/gpuarray/tests/test_basic_ops.py
@@ -351,12 +351,19 @@ class G_Join_and_Split(test_basic.T_Join_and_Split):
        # this is to avoid errors with limited devices
        self.floatX = 'float32'
        self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
-        self.shared = gpuarray_shared_constructor
+        def shared(x, **kwargs):
+            return gpuarray_shared_constructor(x, target=test_ctx_name,
+                                               **kwargs)
+        self.shared = shared
    def test_gpusplit_opt(self):
+        # Test that we move the node to the GPU
+        # Also test float16 computation at the same time.
        rng = np.random.RandomState(seed=utt.fetch_seed())
-        m = self.shared(rng.rand(4, 6).astype(self.floatX))
+        m = self.shared(rng.rand(4, 6).astype('float16'))
        o = T.Split(2)(m, 0, [2, 2])
+        assert o[0].dtype == 'float16'
        f = theano.function([], o, mode=self.mode)
        assert any([isinstance(node.op, self.split_op_class)
                    for node in f.maker.fgraph.toposort()])

--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -11,7 +11,7 @@ from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot
 from theano.tensor.tests.test_blas import TestGer, BaseGemv
 from .. import gpuarray_shared_constructor
-from .config import mode_with_gpu
+from .config import mode_with_gpu, test_ctx_name
 from .test_basic_ops import makeTester, rand
 from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
@@ -48,7 +48,7 @@ def test_float16():
                    rand(3, 3).astype('float16'),
                    rand(3, 3).astype('float16'),
                    np.asarray(0.5, dtype=np.float32)]
-    float16_shared = [gpuarray_shared_constructor(val)
+    float16_shared = [gpuarray_shared_constructor(val, target=test_ctx_name)
                      for val in float16_data]
    o = gpugemm_no_inplace(*float16_shared)
    f = theano.function([], o)

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
@@ -267,6 +267,16 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
                            mode=mode_with_gpu)
        assert f(np.float32([1, 2, 3]), 0) == 6
+    def test_lifter_with_shared_var(self):
+        x = tensor.lscalar('x')
+        y = gpuarray_shared_constructor(np.asarray(1, dtype='float32'),
+                                        target=test_ctx_name)
+        z = tensor.constant(2.)
+        a = theano.ifelse.ifelse(x, y, z)
+        with theano.configparser.change_flags(on_opt_error='raise'):
+            theano.function([x], [a], mode=mode_with_gpu)
 def test_print_op():
    """ Test that print ops don't block gpu optimization"""

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -19,7 +19,7 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
                         GpuAllocDiag)
 from ..type import gpuarray_shared_constructor
-from .config import mode_with_gpu
+from .config import mode_with_gpu, test_ctx_name
 class G_subtensor(test_subtensor.T_subtensor):
@@ -27,9 +27,13 @@ class G_subtensor(test_subtensor.T_subtensor):
        return None
    def __init__(self, name):
+        def shared(x, **kwargs):
+            return gpuarray_shared_constructor(x, target=test_ctx_name,
+                                               **kwargs)
        test_subtensor.T_subtensor.__init__(
            self, name,
-            shared=gpuarray_shared_constructor,
+            shared=shared,
            sub=GpuSubtensor,
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,

--- a/theano/gpuarray/tests/test_type.py
+++ b/theano/gpuarray/tests/test_type.py
 from __future__ import absolute_import, print_function, division
 import os
+import nose
 import numpy as np
 import theano
@@ -60,6 +61,16 @@ def test_filter_float():
        del theano.compile.sharedvalue.shared.constructors[-1]
+def test_gpuarray_shared_scalar():
+    # By default, we don't put scalar as shared variable on the GPU
+    nose.tools.assert_raises(
+        TypeError, gpuarray_shared_constructor, np.asarray(1, dtype='float32'))
+    # But we can force that
+    gpuarray_shared_constructor(np.asarray(1, dtype='float32'),
+                                target=test_ctx_name)
 def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
    """ Test when pygpu isn't there for unpickle are in test_pickle.py"""
    oldflag = config.experimental.unpickle_gpu_on_cpu

--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -658,7 +658,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
        notset object.
    """
-    if target == 'gpu' or target == 'cpu':
+    if target == 'cpu':
        raise TypeError('not for me')
    if not isinstance(value, (np.ndarray, pygpu.gpuarray.GpuArray)):
@@ -667,6 +667,8 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
    if target is notset:
        target = None
        if not gpu_supported(value):
+            raise TypeError('The GPU do not support that value.')
+        if not move_to_gpu(value):
            raise TypeError('We do not move that data by default to the GPU')
    try:
        get_context(target)

--- a/theano/tensor/nnet/opt.py
+++ b/theano/tensor/nnet/opt.py
@@ -74,7 +74,7 @@ compile.optdb.register('local_inplace_sparse_block_outer',
 def local_abstractconv_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv2d):
        return None
@@ -98,7 +98,7 @@ def local_abstractconv_gemm(node):
 def local_abstractconv3d_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv3d):
        return None
@@ -122,7 +122,7 @@ def local_abstractconv3d_gemm(node):
 def local_abstractconv_gradweight_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv2d_gradWeights):
        return None
@@ -149,7 +149,7 @@ def local_abstractconv_gradweight_gemm(node):
 def local_abstractconv3d_gradweight_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv3d_gradWeights):
        return None
@@ -176,7 +176,7 @@ def local_abstractconv3d_gradweight_gemm(node):
 def local_abstractconv_gradinputs_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv2d_gradInputs):
        return None
@@ -201,7 +201,7 @@ def local_abstractconv_gradinputs_gemm(node):
 def local_abstractconv3d_gradinputs_gemm(node):
    # If theano.config.blas.ldflags is empty, Theano will use
    # a NumPy C implementation of [sd]gemm_.
-    if theano.config.cxx == "":
+    if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
        return
    if not isinstance(node.op, AbstractConv3d_gradInputs):
        return None
@@ -225,7 +225,8 @@ def local_abstractconv3d_gradinputs_gemm(node):
 @local_optimizer([AbstractConv2d])
 def local_conv2d_cpu(node):
-    if not isinstance(node.op, AbstractConv2d):
+    if (not isinstance(node.op, AbstractConv2d) or
+            node.inputs[0].dtype == 'float16'):
        return None
    img, kern = node.inputs
@@ -280,7 +281,8 @@ def local_conv3d_cpu(node):
 @local_optimizer([AbstractConv2d_gradWeights])
 def local_conv2d_gradweight_cpu(node):
-    if not isinstance(node.op, AbstractConv2d_gradWeights):
+    if (not isinstance(node.op, AbstractConv2d_gradWeights) or
+            node.inputs[0].dtype == 'float16'):
        return None
    img, topgrad, shape = node.inputs
@@ -431,7 +433,8 @@ def local_conv3d_gradweight_cpu(node):
 @local_optimizer([AbstractConv2d_gradInputs])
 def local_conv2d_gradinputs_cpu(node):
-    if not isinstance(node.op, AbstractConv2d_gradInputs):
+    if (not isinstance(node.op, AbstractConv2d_gradInputs) or
+            node.inputs[0].dtype == 'float16'):
        return None
    kern, topgrad, shape = node.inputs
@@ -611,7 +614,8 @@ def local_abstractconv_check(node):
            'available supporting the requested options. Did you exclude '
            'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
            'is cuDNN available and does the GPU support it? If on CPU, '
-            'do you have a BLAS library installed Theano can link against?' %
+            'do you have a BLAS library installed Theano can link against? '
+            'On the CPU we do not support float16.' %
            node.op.__class__.__name__)
 optdb.register('AbstractConvCheck',