merge

fa31bb68 · James Bergstra · 30af53be · cf2d9d17 · fa31bb68 · fa31bb68
--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -751,13 +751,19 @@ class FunctionMaker(object):
        if not isinstance(inputs, (list, tuple)):
            inputs = [inputs]

+
        # Wrap them in In or Out instances if needed.
+        #import pudb; pudb.set_trace()
        inputs, outputs =  map(self.wrap_in, inputs), map(self.wrap_out, outputs)
-        _inputs = gof.graph.inputs([o.variable for o in outputs] + [i.update for i in inputs if getattr(i, 'update', False)])
+        _inputs = gof.graph.inputs([o.variable for o in outputs] + [i.update 
+            for i in inputs if getattr(i, 'update', False)])
+
+        #TODO: REMOVE THIS CRUFT - it's complicated for SymbolicInputKits
        indices = [[input] + self.expand_in(input, _inputs) for input in inputs]
        expanded_inputs = reduce(list.__add__, [list(z) for x, y, z in indices], [])
+        assert expanded_inputs == inputs  #JB - I added this to make sure we could delete above

-        # make the env
+        # make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
        env, additional_outputs = std_env(expanded_inputs, outputs, accept_inplace)
        self.env = env

@@ -774,12 +780,34 @@ class FunctionMaker(object):
        # but some of the outputs can be shared variables, and is not good for shared
        # variables to be aliased. It might be possible to optimize this by making sure
        # there is no aliasing only between shared variables.
+
+        assert len(inputs) == len(env.inputs)
+
+        updated_env_inputs = [env_i for i, env_i in zip(inputs, env.inputs) if getattr(i, 'update', False)]
+
        for i in xrange(len(env.outputs)):
-            views = set()
-            view_tree_set(alias_root(env.outputs[i]), views)
+            views_of_output_i = set()
+            view_tree_set(alias_root(env.outputs[i]), views_of_output_i)
+            copied = False
+            # do not allow outputs to be aliased
            for j in xrange(i+1, len(env.outputs)):
-                if env.outputs[j] in views:
-                    env.change_input('output', j, deep_copy_op(env.outputs[j]))
+                if env.outputs[j] in views_of_output_i:
+                    env.change_input('output', i, deep_copy_op(env.outputs[i]))
+                    copied = True
+                    break
+
+            if not copied:
+                for input_j in env.inputs:
+                    # do not allow outputs to be aliased to an inputs (j), unless 
+                    # a) that j'th input has been 'destroyed' by e.g. in-place computations
+                    # b) that j'th input is a shared variable that is also being updated
+                    if hasattr(env,'get_destroyers_of') and env.get_destroyers_of(input_j):
+                        continue
+                    if input_j in updated_env_inputs:
+                        continue
+                    if input_j in views_of_output_i:
+                        env.change_input('output', i, deep_copy_op(env.outputs[i]))
+                        break




--- a/theano/compile/sharedvalue.py
+++ b/theano/compile/sharedvalue.py
@@ -64,11 +64,37 @@ class SharedVariable(Variable):
                    readonly=False,
                    strict=strict)

-    def __set(self,new_value):
-        self.container.value = new_value
+    def get_value(self, borrow=False):
+        """Get the non-symbolic value associated with this SharedVariable.
+
+        :param borrow: 
+            True to return the internal value directly, potentially creating problems related
+            to aliased memory.
+
+        If the return value is mutable, and you have used borrow=True to get at the internal
+        value, then you should be careful about changing it.  If you modify it, call
+        set_value(rval, borrow=True) to tell Theano that you modified it.  (Theano may have
+        cached computations based on the old value.)
+        
+        """
+        if borrow:
+            return self.container.value
+        else:
+            return copy.deepcopy(self.container.value)

-    def __get(self):
-        return self.container.value
+    def set_value(self,new_value, borrow=False):
+        """Set the non-symbolic value associated with this SharedVariable.
+
+        :param borrow: 
+            True to use the new_value directly, potentially creating problems
+            related to aliased memory.
+        
+        Changes to this value will be visible to all functions using this SharedVariable.
+        """
+        if borrow:
+            self.container.value = new_value
+        else:
+            self.container.value = copy.deepcopy(new_value)

    def clone(self):
        cp = self.__class__(
@@ -80,16 +106,9 @@ class SharedVariable(Variable):
        cp.tag = copy.copy(self.tag)
        return cp

-    value = property(__get, __set)
-    #value = self.container.value #GD- would've thought mapping one property to another would work
-
-    """Read/write the non-symbolic value associated with this SharedVariable.
-    
-    If the SharedVariable is shared, changes to this value will be visible to all functions using
-    this SharedVariable.  If this SharedVariable is not shared, a change will not be visible to
-    functions that were created before the change.
+    value = property(get_value, set_value, 
+            doc="shortcut for self.get_value() and self.set_value() which COPIES data")

-    """

    def filter_update(self, update):
        """When this shared variable is updated by a pfunc, the update value will be run through this function.

--- a/theano/compile/tests/test_function_module.py
+++ b/theano/compile/tests/test_function_module.py
@@ -285,7 +285,7 @@ class T_function(unittest.TestCase):
        a = T.dmatrix()
        f = function([a], Out(a, borrow=False))
        o = N.ones((3,3))
-        assert o is f(o) #borrow does not imply copy. 
+        assert o is not f(o) #function no longer permits aliasing outputs to inputs

        f = function([a], Out(a*4, borrow=False))
        o = N.ones((3,3))

--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
--- a/theano/compile/tests/test_shared.py
+++ b/theano/compile/tests/test_shared.py
@@ -107,8 +107,8 @@ class Test_SharedVariable(unittest.TestCase):

        # check that an assignment of a perfect value results in no copying
        uval = theano._asarray([5,6,7,8], dtype='float64')
-        u.value = uval
-        assert u.value is uval
+        u.set_value(uval, borrow=True)
+        assert u.get_value(borrow=True) is uval

    def test_scalar_strict(self):
        def f(var, val): var.value = val

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -32,6 +32,8 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
    """
    gmap = {}
    for (r, g_r) in sources:
+        if not hasattr(r, 'type'):
+            raise TypeError('sources must be Variables', r)
        if g_r is not None:
            if r in gmap:
                gmap[r] = gmap[r] + g_r
@@ -52,6 +54,10 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
        output_arg = g_outputs
        input_arg = node.inputs

+        # Each Op's grad function requires inputs and output_grads
+        # If the Op destroys any input, but the grad expression uses it, then chances are the
+        # resulting graph will have a dependency cycle.  We avoid this cycle by passing
+        # (symbolic) copies of each destroyed input.
        try:
            dinputs = [node.inputs[x[0]] for x in node.op.destroy_map.values()]
        except AttributeError:
@@ -93,6 +99,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
            if g_r and len(sources) == 1 and sources[0][0].name and r.name:
                g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name)
            if g_r is not None: 
+                assert r is not None
                if r in gmap:
                    gmap[r] = gmap[r] + g_r
                else:

--- a/theano/tensor/sharedvar.py
+++ b/theano/tensor/sharedvar.py
@@ -27,7 +27,7 @@ def tensor_constructor(value, name=None, strict=False, broadcastable=None):
    if broadcastable is None:
        broadcastable = (False,)*len(value.shape)
    type = TensorType(value.dtype, broadcastable=broadcastable)
-    return TensorSharedVariable(type=type, value=value, name=name, strict=strict)
+    return TensorSharedVariable(type=type, value=numpy.array(value,copy=True), name=name, strict=strict)

 # TensorSharedVariable brings in the tensor operators, is not ideal, but works as long as we
 # dont do purely scalar-scalar operations 
@@ -56,7 +56,7 @@ def scalar_constructor(value, name=None, strict=False):
        # Do not pass the dtype to asarray because we want this to fail if
        # strict is True and the types do not match.
        rval = ScalarSharedVariable(type=tensor_type,
-                value=numpy.asarray(value),
+                value=numpy.array(value, copy=True),
                name=name, strict=strict)
        return rval
    except: