Merge pull request #360 from lamblin/filter_variable

Add "filter_variable" mechanism in Type

Merge pull request #360 from lamblin/filter_variable
21789731 · nouiz · 9b63b9f6 · 144ea9a6 · 21789731 · 21789731
--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
@@ -103,7 +103,7 @@ def rebuild_collect_shared( outputs
                    # Do not use default_update if a "real" update was
                    # provided
                    if v not in update_d:
-                        v_update = v.filter_update(v.default_update)
+                        v_update = v.type.filter_variable(v.default_update)
                        if v_update.type != v.type:
                            raise TypeError(
                                ( 'an update must have the same type as '
@@ -188,8 +188,8 @@ def rebuild_collect_shared( outputs
                              'expression'),
                              (store_into, update_d[store_into]))

-        update_val = store_into.filter_update(update_val)
-                                        # typically this might be a cast()
+        # filter_variable ensure smooth conversion of cpu/gpu Types
+        update_val = store_into.type.filter_variable(update_val)
        if update_val.type != store_into.type:
            err_msg  = ( 'an update must have the same type as the '
                        'original shared variable(dest, dest.type, '

--- a/theano/compile/sharedvalue.py
+++ b/theano/compile/sharedvalue.py
@@ -118,29 +118,6 @@ class SharedVariable(Variable):
        cp.tag = copy.copy(self.tag)
        return cp

-    def filter_update(self, update):
-        """
-        When this shared variable is updated by a pfunc, the update value will be run through this function.
-
-        This is a good spot to cast or convert the update expression as necessary.
-
-        Default behaviour is to return `update` unmodified if it is a Variable, otherwise to create a SharedVariable for it by calling ``shared(update)``.
-
-        :param update: the new value for this shared variable when updated by a pfunc.
-
-        :returns: a Variable whose value will be assigned to this SharedVariable by a pfunc.
-
-        :note: The return value of this function must match the self.type, or else pfunc()
-        will raise a TypeError.
-        """
-        if not isinstance(update, Variable):
-            # The value for the update is not a Variable: we cast it into
-            # a shared Variable so that it can be used by 'function'. Note that
-            # it means the update value may change if it is mutable and its
-            # value is modified after the function is created.
-            update = shared(update)
-        return update
-
    def __getitem__(self, *args):
        # __getitem__ is not available for generic SharedVariable objects.
        # We raise a TypeError like Python would do if __getitem__ was not

--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -152,28 +152,33 @@ class Apply(utils.object2):
        :type strict: Bool

        :param strict:
-            If True, the type fields of all the inputs must be equal to the current ones, and
-            returned outputs are guaranteed to have the same types as self.outputs.  If False,
-            then there's no guarantee that the clone's outputs will have the same types as
-            self.outputs, and cloning may not even be possible (it depends on the Op).
+            If True, the type fields of all the inputs must be equal
+            to the current ones (or compatible, for instance Tensor /
+            CudaNdarray of the same dtype and broadcastable patterns,
+            in which case they will be converted into current Type), and
+            returned outputs are guaranteed to have the same types as
+            self.outputs.  If False, then there's no guarantee that the
+            clone's outputs will have the same types as self.outputs,
+            and cloning may not even be possible (it depends on the Op).

        :returns: an Apply instance with the same op but different outputs.

        """
        remake_node = False
-        for curr, new in zip(self.inputs, inputs):
+        new_inputs = inputs[:]
+        for i, (curr, new) in enumerate(zip(self.inputs, new_inputs)):
            if not curr.type == new.type:
                if strict:
-                    raise TypeError("Cannot change the type of this input.", ((curr, curr.type),
-                            (new, new.type)))
+                    # If compatible, casts new into curr.type
+                    new_inputs[i] = curr.type.filter_variable(new)
                else:
                    remake_node = True
        if remake_node:
-            new_node = self.op.make_node(*inputs)
+            new_node = self.op.make_node(*new_inputs)
            new_node.tag = copy(self.tag).__update__(new_node.tag)
        else:
            new_node = self.clone()
-            new_node.inputs = inputs
+            new_node.inputs = new_inputs
        return new_node

    #convenience properties

--- a/theano/gof/type.py
+++ b/theano/gof/type.py
@@ -228,9 +228,35 @@ class PureType(object):
    # filter() This is to allow reusing the old allocated memory. As
    # of this writing this is used only when we transfer new data to a
    # shared variable on the gpu.  
-    
+
    #def filter_inplace(value, storage, strict=False, allow_downcast=None)

+    def filter_variable(self, other):
+        """Convert a symbolic variable into this Type, if compatible.
+
+        For the moment, the only Types compatible with one another are
+        TensorType and CudaNdarrayType, provided they have the same
+        number of dimensions, same broadcasting pattern, and same dtype.
+
+        If Types are not compatible, a TypeError should be raised.
+        """
+        if not isinstance(other, graph.Variable):
+            # The value is not a Variable: we cast it into
+            # a Constant of the appropriate Type.
+            other = self.Constant(type=self, data=other)
+
+        if other.type != self:
+            raise TypeError(
+                    'Cannot convert Type %(othertype)s '
+                    '(of Variable %(other)s) into Type %(self)s. '
+                    'You can try to manually convert %(other)s into a %(self)s.'
+                    % dict(
+                        othertype=other.type,
+                        other=other,
+                        self=self)
+                    )
+        return other
+
    def is_valid_value(self, a):
        """Required: Return True for any python object `a` that would be a legal value for a Variable of this Type"""
        try:

--- a/theano/ifelse.py
+++ b/theano/ifelse.py
@@ -26,6 +26,7 @@ import logging
 from theano.gof import PureOp, Apply

 import theano.tensor
+from theano.tensor import TensorType
 import gof

 from compile import optdb
@@ -312,7 +313,10 @@ def ifelse(condition, then_branch, else_branch, name=None):
    if type(else_branch) not in (list, tuple):
        else_branch = [else_branch]

-
+    # Some of the elements might be converted into another type,
+    # we will store them in these new_... lists.
+    new_then_branch = []
+    new_else_branch = []
    for then_branch_elem, else_branch_elem in zip(then_branch, else_branch):
        if not isinstance(then_branch_elem, theano.Variable):
            then_branch_elem = theano.tensor.as_tensor_variable(then_branch_elem)
@@ -320,14 +324,32 @@ def ifelse(condition, then_branch, else_branch, name=None):
            else_branch_elem = theano.tensor.as_tensor_variable(else_branch_elem)

        if then_branch_elem.type != else_branch_elem.type:
-            raise ValueError(('The two branches should have identical types, '
+            # If one of them is a TensorType, and the other one can be
+            # converted into one, then we try to do that.
+            # This case happens when one of the elements has a GPU type,
+            # for instance a shared variable that was silently moved to GPU.
+            if (isinstance(then_branch_elem.type, TensorType)
+                    and not isinstance(else_branch_elem.type, TensorType)):
+                else_branch_elem = then_branch_elem.type.filter_variable(
+                        else_branch_elem)
+
+            elif (isinstance(else_branch_elem.type, TensorType)
+                    and not isinstance(then_branch_elem.type, TensorType)):
+                then_branch_elem = else_branch_elem.type.filter_variable(
+                        then_branch_elem)
+
+            if then_branch_elem.type != else_branch_elem.type:
+                # If the types still don't match, there is a problem.
+                raise ValueError(
+                         ('The two branches should have identical types, '
                          ' but they are '+str(then_branch_elem.type)+' and '+
                          str(else_branch_elem.type)+' respectively. '
                          'This error could be raised if for example '
                          ' you provided a one element list on the then '
                          ' branch but a tensor on the else branch'))

-
+        new_then_branch.append(then_branch_elem)
+        new_else_branch.append(else_branch_elem)

    if len(then_branch) != len(else_branch):
        raise ValueError(('The number of values on the `then` branch'
@@ -341,7 +363,7 @@ def ifelse(condition, then_branch, else_branch, name=None):
                        gpu=False,
                        name=name)

-    ins = [condition] + list(then_branch) + list(else_branch)
+    ins = [condition] + list(new_then_branch) + list(new_else_branch)
    rval = new_ifelse.make_node(*ins).outputs

    if rval_type is None:

--- a/theano/sandbox/cuda/tests/test_var.py
+++ b/theano/sandbox/cuda/tests/test_var.py
+import unittest
 import numpy
 from nose.plugins.skip import SkipTest

+import theano
+from theano import tensor
+
+from theano.ifelse import ifelse
+from theano import sparse
+from theano.tensor import TensorType
+from theano.tests import unittest_tools as utt
 from theano.sandbox.cuda.var import float32_shared_constructor as f32sc
 from theano.sandbox.cuda import CudaNdarrayType, cuda_available

@@ -34,3 +42,110 @@ def test_float32_shared_constructor():
    assert eq(
            f32sc(numpy.zeros((2,3,4,5), dtype='float32')).type,
            CudaNdarrayType((False,)*4))
+
+def test_givens():
+    # Test that you can use a TensorType expression to replace a
+    # CudaNdarrayType in the givens dictionary.
+    # This test case uses code mentionned in #757
+    data = numpy.float32([1,2,3,4])
+    x = f32sc(data)
+    y = x**2
+    f = theano.function([x], y, givens={x:x+1})
+
+class T_updates(unittest.TestCase):
+    # Test that you can use a TensorType expression to update a
+    # CudaNdarrayType in the updates dictionary.
+
+    def test_1(self):
+        data = numpy.float32([1,2,3,4])
+        x = f32sc(data)
+        y = x**2
+        f = theano.function([], y, updates={x:x+1})
+
+    def test_2(self):
+        # This test case uses code mentionned in #698
+        data = numpy.random.rand(10,10).astype('float32')
+        output_var = f32sc(name="output",
+                value=numpy.zeros((10,10), 'float32'))
+
+        x = tensor.fmatrix('x')
+        output_updates = {output_var:x**2}
+        output_givens = {x:data}
+        output_func = theano.function(inputs=[], outputs=[],
+                updates=output_updates, givens=output_givens)
+        output_func()
+
+class T_ifelse(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+        self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
+
+    def test_cuda_tensor(self):
+        data = self.rng.rand(4).astype('float32')
+        x = f32sc(data)
+        y = x + 1
+        cond = theano.tensor.iscalar('cond')
+
+        assert isinstance(x.type, CudaNdarrayType)
+        assert isinstance(y.type, TensorType)
+
+        out1 = ifelse(cond, x, y)
+        out2 = ifelse(cond, y, x)
+
+        assert isinstance(out1.type, TensorType)
+        assert isinstance(out2.type, TensorType)
+
+        f = theano.function([cond], out1)
+        g = theano.function([cond], out2)
+
+        assert numpy.all(f(0) == data+1)
+        assert numpy.all(f(1) == data)
+        assert numpy.all(g(0) == data)
+        assert numpy.all(g(1) == data+1)
+
+    def test_dtype_mismatch(self):
+        data = self.rng.rand(5).astype('float32')
+        x = f32sc(data)
+        y = tensor.cast(x, 'float64')
+        cond = theano.tensor.iscalar('cond')
+
+        self.assertRaises(TypeError, ifelse, cond, x, y)
+        self.assertRaises(TypeError, ifelse, cond, y, x)
+
+    def test_ndim_mismatch(self):
+        data = self.rng.rand(5).astype('float32')
+        x = f32sc(data)
+        y = tensor.fcol('y')
+        cond = theano.tensor.iscalar('cond')
+
+        self.assertRaises(TypeError, ifelse, cond, x, y)
+        self.assertRaises(TypeError, ifelse, cond, y, x)
+
+    def test_broadcast_mismatch(self):
+        data = self.rng.rand(2,3).astype('float32')
+        x = f32sc(data)
+        print x.broadcastable
+        y = tensor.frow('y')
+        print y.broadcastable
+        cond = theano.tensor.iscalar('cond')
+
+        self.assertRaises(TypeError, ifelse, cond, x, y)
+        self.assertRaises(TypeError, ifelse, cond, y, x)
+
+    def test_sparse_tensor_error(self):
+        data = self.rng.rand(2,3).astype('float32')
+        x = f32sc(data)
+        y = sparse.matrix('csc', dtype='float32', name='y')
+        z = sparse.matrix('csr', dtype='float32', name='z')
+        cond = theano.tensor.iscalar('cond')
+
+        # Right now (2012-01-19), a ValueError gets raised, but I thing
+        # a TypeError (like in the other cases) would be fine.
+        self.assertRaises((TypeError, ValueError), ifelse, cond, x, y)
+        self.assertRaises((TypeError, ValueError), ifelse, cond, y, x)
+        self.assertRaises((TypeError, ValueError), ifelse, cond, x, z)
+        self.assertRaises((TypeError, ValueError), ifelse, cond, z, x)
+        self.assertRaises((TypeError, ValueError), ifelse, cond, y, z)
+        self.assertRaises((TypeError, ValueError), ifelse, cond, z, y)
+
+
--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -97,6 +97,32 @@ class CudaNdarrayType(Type):
                        % (self, self.dtype, data, converted_data, self.dtype),
                        data)

+    def filter_variable(self, other):
+        """Convert a Variable into a CudaNdarrayType, if compatible.
+
+        This Variable should either already be a CudaNdarrayType, or be
+        a TensorType. It has to have the right number of dimensions,
+        broadcastable pattern, and dtype.
+        """
+        if hasattr(other, '_as_CudaNdarrayVariable'):
+            other = other._as_CudaNdarrayVariable()
+
+        if not isinstance(other, Variable):
+            # The value is not a Variable: we cast it into
+            # a Constant of the appropriate Type.
+            other = self.Constant(type=self, data=other)
+
+        if other.type == self:
+            return other
+
+        if not isinstance(other.type, tensor.TensorType):
+            raise TypeError('Incompatible type', (self, other.type))
+        if (other.type.dtype != self.dtype):
+            raise TypeError('Incompatible dtype', (self.dtype, other.type.dtype))
+        if (other.type.broadcastable != self.broadcastable):
+            raise TypeError('Incompatible broadcastable', (self.broadcastable,
+                other.type.broadcastable))
+        return theano.sandbox.cuda.basic_ops.GpuFromHost()(other)

    @staticmethod
    def bound(a):

--- a/theano/sandbox/cuda/var.py
+++ b/theano/sandbox/cuda/var.py
@@ -127,19 +127,6 @@ class CudaNdarraySharedVariable(SharedVariable, _operators):
                value = copy.deepcopy(value)
        self.container.value = value # this will copy a numpy ndarray

-    def filter_update(self, other):
-        if hasattr(other, '_as_CudaNdarrayVariable'):
-            return other._as_CudaNdarrayVariable()
-
-        if not isinstance(other.type, tensor.TensorType):
-            raise TypeError('Incompatible type', (self, (self.type, other.type)))
-        if (other.type.dtype != self.dtype):
-            raise TypeError('Incompatible dtype', (self, (self.dtype, other.type.dtype)))
-        if (other.type.broadcastable != self.broadcastable):
-            raise TypeError('Incompatible broadcastable', (self, (self.broadcastable,
-                other.type.broadcastable)))
-        return GpuFromHost()(other)
-
    def __getitem__(self, *args):
        # Defined to explicitly use the implementation from `_operators`, since
        # the definition in `SharedVariable` is only meant to raise an error.

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -633,6 +633,35 @@ class TensorType(Type):
            raise ValueError("non-finite elements not allowed")
        return data

+    def filter_variable(self, other):
+        """Convert a symbolic Variable into a TensorType, if compatible.
+
+        For the moment, only a TensorType or CudaNdarrayType will be
+        converted, provided they have the same number of dimensions,
+        broadcastable pattern, and dtype.
+        """
+        if hasattr(other, '_as_TensorVariable'):
+            other = other._as_TensorVariable()
+
+        if not isinstance(other, Variable):
+            # The value is not a Variable: we cast it into
+            # a Constant of the appropriate Type.
+            other = self.Constant(type=self, data=other)
+
+        if other.type == self:
+            return other
+
+        raise TypeError(
+                'Cannot convert Type %(othertype)s '
+                '(of Variable %(other)s) into Type %(self)s. '
+                'You can try to manually convert %(other)s into a %(self)s.'
+                % dict(
+                    othertype=other.type,
+                    other=other,
+                    self=self)
+                )
+
+
    def value_validity_msg(self, a):
        try:
            self.filter(a, strict=True)