merge

030bb405 · James Bergstra · 8449e7e3 · 5cf3761c · 030bb405 · 030bb405
--- a/doc/sandbox/module.txt
+++ b/doc/sandbox/module.txt
@@ -221,81 +221,7 @@ Advanced example

 Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system:

-.. code-block:: python
-
-    class RegressionLayer(M.Module):
-
-        def __init__(self, input = None, target = None, regularize = True):
-            super(RegressionLayer, self).__init__() #boilerplate
-            # MODEL CONFIGURATION
-            self.regularize = regularize
-            # ACQUIRE/MAKE INPUT AND TARGET
-            if not input:
-                input = T.matrix('input')
-            if not target:
-                target = T.matrix('target')
-            # HYPER-PARAMETERS
-            self.stepsize = T.scalar()  # a stepsize for gradient descent
-            # PARAMETERS
-            self.w = T.matrix()  #the linear transform to apply to our input points
-            self.b = T.vector()  #a vector of biases, which make our transform affine instead of linear
-            # REGRESSION MODEL
-            self.activation = T.dot(input, self.w) + self.b
-            self.prediction = self.build_prediction()
-            # CLASSIFICATION COST
-            self.classification_cost = self.build_classification_cost(target)
-            # REGULARIZATION COST
-            self.regularization = self.build_regularization()
-            # TOTAL COST
-            self.cost = self.classification_cost
-            if self.regularize:
-                self.cost = self.cost + self.regularization
-            # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
-            self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
-            # INTERFACE METHODS
-            self.update = M.Method([input, target],
-                                   self.cost,
-                                   updates={self.w: self.w - self.stepsize * self.grad_w,
-                                            self.b: self.b - self.stepsize * self.grad_b})
-            self.apply = M.Method(input, self.prediction)
-
-        def params(self):
-            return self.w, self.b
-
-        def _instance_initialize(self, obj, input_size = None, target_size = None,
-                                 seed = 1827, **init):
-            # obj is an "instance" of this module holding values for each member and
-            # functions for each method
-            if input_size and target_size:
-                # initialize w and b in a special way using input_size and target_size
-                sz = (input_size, target_size)
-                rng = N.random.RandomState(seed)
-                obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
-                obj.b = N.zeros(target_size)
-                obj.stepsize = 0.01
-            # here we call the default_initialize method, which takes all the name: value
-            # pairs in init and sets the property with that name to the provided value
-            # this covers setting stepsize, l2_coef; w and b can be set that way too
-            # we call it after as we want the parameter to superseed the default value.
-            M.default_initialize(obj,**init)
-
-        def build_regularization(self):
-            return T.zero() # no regularization!
-
-    class SoftmaxXERegression(RegressionLayer):
-        """ XE means cross entropy"""
-        def build_prediction(self):
-            return NN.softmax(self.activation)
-
-        def build_classification_cost(self, target):
-            #self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
-            self.classification_cost_matrix = (target - self.prediction)**2
-            self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
-            return T.sum(self.classification_costs)
-
-        def build_regularization(self):
-            self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
-            return self.l2_coef * T.sum(self.w * self.w)
+.. literalinclude:: ../code/regression.py

 Here is how we use the model:


--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -136,24 +136,27 @@ class BadDestroyMap(DebugModeError):

 class BadViewMap(DebugModeError):
    """Exception: Some perform() or c_code() created a memory alias that wasn't in the view_map"""
-    def __init__(self, node, idx, old_val, new_val):
+    def __init__(self, node, output_idx, out_storage, in_alias_idx=None, out_alias_idx=None):
        super(BadViewMap, self).__init__()
        self.node = node
-        self.idx = idx
-        self.old_val = old_val
-        self.new_val = new_val
+        self.output_idx = output_idx
+        self.out_storage = out_storage
+        self.in_alias_idx = in_alias_idx
+        self.out_alias_idx = out_alias_idx
    
    def __str__(self):
        sio = StringIO()
        print >> sio, "  node:", self.node
        print >> sio, "  node.inputs:", [(str(i), id(i)) for i in self.node.inputs]
+        print >> sio, "  node.outputs:", [(str(i), id(i)) for i in self.node.outputs]
        print >> sio, "  view_map:", getattr(self.node.op, 'view_map', {})
-        print >> sio, "  changed input idx:", self.idx
-        print >> sio, "  changed input type:", self.node.inputs[self.idx].type
-        print >> sio, "  repr (old val):", repr(self.old_val)
-        print >> sio, "  repr (new val):", repr(self.new_val)
-        print >> sio, ""
-        print >> sio, "  Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation that compares node input values"
+        print >> sio, "  destroy_map:", getattr(self.node.op, 'destroy_map', {})
+        print >> sio, "  aliased output:", self.output_idx
+        print >> sio, "  aliased output storage:", self.out_storage
+        if self.in_alias_idx:
+            print >> sio, "  aliased to inputs:", self.in_alias_idx
+        if self.out_alias_idx:
+            print >> sio, "  aliased to outputs:", self.out_alias_idx
        return sio.getvalue()

 class StochasticOrder(DebugModeError):
@@ -273,7 +276,85 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, clobber_dr_v
            else:
                raise BadDestroyMap(node, r_idx, r_vals[r], storage_map[r][0])

+
+def _check_viewmap(node, storage_map):
+    """
+    This functions raises a BadViewMap exception when it detects the following:
+    - output node storages aliased to input storage, with no declaration in view_map
+    - if not aliased to an input, check if two outputs are aliased together
+      and used subsequently in the graph
+    """
+
+    for oi, onode in enumerate(node.outputs):
+        input_alias = None
+        outstorage = storage_map[onode][0]
+        instorage_id = [id(storage_map[i][0]) for i in node.inputs]
+        
+        # TODO: investigate ways in which other Types may be aliased
+        # TODO: consider adding a function to Type to detect aliasing
+        danger_flag = id(outstorage) in instorage_id or\
+                      (type(outstorage)==numpy.ndarray and 
+                       outstorage.flags['OWNDATA']==False)
+        if danger_flag:
+            # first find out which input it aliases
+
+            # In theory, theano's view_map only allows for 1 output to alias 1 input
+            # Checking for multiple aliases just in case...
+            alias = {}
+            for ii, inode in enumerate(node.inputs):
+                if _may_share_memory(outstorage, storage_map[inode][0]):
+                    alias[ii] = (ii,inode)
+
+            # if its aliased but its declared in the view/destroy map = OK
+            viewmapped = False
+            view_map = getattr(node.op, 'view_map', {})
+            destroy_map = getattr(node.op, 'destroy_map', {})
+            for key,val in view_map.items()+destroy_map.items():
+                val = val[0] # view_map stores a list with single-entries
+                if key==oi and val in alias.keys():
+                    # pfeew, its viewmapped. we're good
+                    input_alias = alias.pop(val)
+
+            # if there's anything left in alias, there's a problem
+            if len(alias):
+                raise BadViewMap(node, oi, outstorage, alias.keys())
+            
+        #need to check output->output aliasing as well
+        if not input_alias and _is_used_in_graph(onode):
+            for other_oi, other_onode in enumerate(node.outputs):
+                if other_oi==oi: continue
+
+                other_storage = storage_map[other_onode][0]
+                # check to see if we share memory with this other output
+                # this is not a problem if the node is not actually used
+                if _is_used_in_graph(other_onode) and \
+                        _may_share_memory(outstorage, other_storage):
+                    raise BadViewMap(node, oi, outstorage, out_alias_idx=other_oi)
+
+def _may_share_memory(a, b):
+    return (hasattr(a,'__array_interface__') and
+            hasattr(b,'__array_interface__') and
+            numpy.may_share_memory(a,b))
+            
+def _is_function_output(node):
+    """
+    Returns True if the node in question is the a final output of the graph
+    """
+    return node.clients==[('output', 1)]
+
+def _is_used_in_graph(node):
+    return not(_is_function_output(node) or node.clients==[])
+
+
 def _lessbroken_deepcopy(a):
+    """
+    :param a: any object
+
+    Returns a copy of `a` that shares no internal storage with the original.  A deep copy.
+    This function handles numpy arrays specially to avoid some bug I had one time... (possibly
+    about copying 1-d arrays?)
+    """
+    # this exists because numpy copies are broken
    if type(a) is numpy.ndarray:
        rval = numpy.array(a, copy=True, dtype=a.dtype)
    else:
@@ -718,7 +799,7 @@ class _Linker(gof.link.LocalLinker):
            for r, s in storage_map.iteritems():
                assert s[0] is None

-            try:
+            #try:
            # compute the value of all variables
            for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py, thunks_c, order)):
                this_node_destroyed_variables = set()
@@ -738,6 +819,8 @@ class _Linker(gof.link.LocalLinker):
                    _check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
                            clobber_dr_vals=True)

+                    _check_viewmap(node, storage_map)
+
                    # check output values for type-correctness
                    #retrieve each output from the storage_map
                    for r in node.outputs:
@@ -760,6 +843,8 @@ class _Linker(gof.link.LocalLinker):
                    _check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
                            clobber_dr_vals=False)

+                    _check_viewmap(node, storage_map)
+
                    for r in node.outputs:
                        # check output values for type-correctness
                        if not r.type.is_valid_value(storage_map[r][0]):
@@ -780,8 +865,8 @@ class _Linker(gof.link.LocalLinker):
                for r in node.inputs:
                    storage_map[r][0] = None

-            except:
-                raise_with_op(node)
+            #except:
+            #    raise_with_op(node)

            _find_bad_optimizations(order, env.equivalence_tracker.reasons, r_vals)

@@ -898,7 +983,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
                            pass

                    print >> sys.stderr, "EXITING"
-                    sys.exit(1)
+                    sys.exit(1) #there is a ticket related to not calling sys.exit here.
                    break
                else:
                    if self.verbose:

--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -7,6 +7,7 @@ import theano
 import theano.tensor
 from theano.compile import debugmode
 import theano.compile
+import unittest

 def test0():
    x = theano.tensor.dvector()
@@ -342,34 +343,65 @@ def test_baddestroymap_c():
        pass


-def test_badviewmap():
-    class BadAdd(gof.Op):
+class Test_ViewMap(unittest.TestCase):
+
+    class BadAddRef(gof.Op):
        def make_node(self, a, b):
            c = b.type()
            return gof.Apply(self, [a,b], [c])
        def perform(self, node, (a,b), (c,)):
            c[0] = b

+    class BadAddSlice(gof.Op):
+        def make_node(self, a, b):
+            c = b.type()
+            return gof.Apply(self, [a,b], [c])
+        def perform(self, node, (a,b), (c,)):
+            c[0] = b[1:3]
+
+    def test_badviewmap_ref(self):
        x = theano.tensor.dvector()
        y = theano.tensor.dvector()
-    f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE')
+        f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE')
+        try:
+            f([1,2], [3,4])
+            assert False #failed to raise error
+        except debugmode.BadViewMap:
+            return

+    def test_badviewmap_slice(self):
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
+        f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE')
        try:
            f([1,2], [3,4])
            assert False #failed to raise error
        except debugmode.BadViewMap:
            return

-def test_badviewmap_c():
+    def test_goodviewmap(self):
+        goodop = self.BadAddRef()
+        goodop.view_map = {0: [1]}
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
+        f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE')
+        try:
+            f([1,5,1], [3,4,2,1,4])
+            return
+        except debugmode.BadViewMap:
+            assert False #failed to raise error
+
+
+    def test_badviewmap_c(self):
        x = theano.tensor.dvector()
        f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
        try:
            f([1,2])
            assert False #failed to raise error
-    except debugmode.BadDestroyMap:
+        except debugmode.BadViewMap:
            pass

-def test_aliased_outputs_ok():
+    def test_aliased_outputs_ok(self):
        #here aliased outputs is ok because they are both aliased to an input as well
        class CustomOp(gof.Op):
            view_map = {0:[0], 1:[0]}
@@ -377,20 +409,20 @@ def test_aliased_outputs_ok():
                c = a.type()
                d = a.type()
                return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
+            def perform(self, node, (a,b), (c,d)):
                c[0] = a
                d[0] = a[1:]

-    x = theano.tensor.dvector()
-    y = theano.tensor.dvector()
+        x = theano.tensor.dvector('x')
+        y = theano.tensor.dvector('y')
        f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')

-    r0, r1 = f([1,2,3,4])
+        r0, r1 = f([1,2,3,4],[5,6,7,8])

        assert numpy.all(r0 == [1,2,3,4])
        assert numpy.all(r1 == [2,3,4])

-def test_aliased_outputs_ok_output():
+    def test_aliased_outputs_ok_output(self):
        # here aliased outputs is ok because they are both outputs of the function as a whole and
        # thus not destroy-able
        class CustomOp(gof.Op):
@@ -398,7 +430,7 @@ def test_aliased_outputs_ok_output():
                c = a.type()
                d = a.type()
                return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
+            def perform(self, node, (a,b), (c,d)):
                r = a * 2
                c[0] = r
                d[0] = r[1:]
@@ -407,12 +439,12 @@ def test_aliased_outputs_ok_output():
        y = theano.tensor.dvector()
        f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')

-    r0, r1 = f([1,2,3,4])
+        r0, r1 = f([1,2,3,4],[5,6,7,8])

        assert numpy.all(r0 == [2,4,6,8])
        assert numpy.all(r1 == [4,6,8])

-def test_aliased_outputs_ok_shadow():
+    def test_aliased_outputs_ok_shadow(self):
        # here the alias between outputs is ok because one of them is not used for subsequent
        # computation.  This is like the case where we use one output as a memory buffer to serve
        # another output.
@@ -421,21 +453,21 @@ def test_aliased_outputs_ok_shadow():
                c = a.type()
                d = a.type()
                return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
+            def perform(self, node, (a,b), (c,d)):
                r = a * 1
                c[0] = r
                d[0] = r[1:]

-    x = theano.tensor.dvector()
-    y = theano.tensor.dvector()
+        x = theano.tensor.dvector('x')
+        y = theano.tensor.dvector('y')
        f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')

-    r0 = f([1,2,3,4])
+        r0 = f([1,2,3,4],[5,6,7,8])

        assert numpy.all(r0 == [2,4,6,8])


-def test_aliased_outputs_bad():
+    def test_aliased_outputs_bad(self):
        # here the alias between outputs is not ok because destroying one destroys the other, but
        # there's no way to warn theano about it through the view_map mechanism.
        class CustomOp(gof.Op):
@@ -443,7 +475,7 @@ def test_aliased_outputs_bad():
                c = a.type()
                d = a.type()
                return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
+            def perform(self, node, (a,b), (c,d)):
                r = a * 1
                c[0] = r[:-1]
                d[0] = r[1:]
@@ -456,14 +488,14 @@ def test_aliased_outputs_bad():
        f = theano.function([x, y], out, mode='DEBUG_MODE')

        try:
-        r0 = f([1,2,3,4])
+            r0 = f([1,2,3,4],[5,6,7,8])
            assert False # DebugMode should have caught the error
        except debugmode.BadViewMap, e:
+            print e
            pass

        # the situation can be rescued by picking one of the inputs and pretending that it is
        # aliased to both the outputs.  This unfairly disables any destructive operations on the
        # input, but guarantees correctness.
-    custom_op.view_map = {0:[0], 1:[1]}
-    f([1,2,3,4])
-
+        #custom_op.view_map = {0:[0], 1:[1]}
+        #f([1,2,3,4],[5,6,7,8])
--- a/theano/compile/tests/test_inplace_opt_for_value.py
+++ b/theano/compile/tests/test_inplace_opt_for_value.py
@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn()

 class TanhRnnGrad(Op):
    """Gradient calculation for TanhRnn"""
-
+    view_map = {0: [2]}
    def __init__(self):
        pass