merge

030bb405 · James Bergstra · 8449e7e3 · 5cf3761c · 030bb405 · 030bb405
--- a/doc/sandbox/module.txt
+++ b/doc/sandbox/module.txt
@@ -221,81 +221,7 @@ Advanced example
 Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system:
-.. code-block:: python
+.. literalinclude:: ../code/regression.py
-    class RegressionLayer(M.Module):
-        def __init__(self, input = None, target = None, regularize = True):
-            super(RegressionLayer, self).__init__() #boilerplate
-            # MODEL CONFIGURATION
-            self.regularize = regularize
-            # ACQUIRE/MAKE INPUT AND TARGET
-            if not input:
-                input = T.matrix('input')
-            if not target:
-                target = T.matrix('target')
-            # HYPER-PARAMETERS
-            self.stepsize = T.scalar()  # a stepsize for gradient descent
-            # PARAMETERS
-            self.w = T.matrix()  #the linear transform to apply to our input points
-            self.b = T.vector()  #a vector of biases, which make our transform affine instead of linear
-            # REGRESSION MODEL
-            self.activation = T.dot(input, self.w) + self.b
-            self.prediction = self.build_prediction()
-            # CLASSIFICATION COST
-            self.classification_cost = self.build_classification_cost(target)
-            # REGULARIZATION COST
-            self.regularization = self.build_regularization()
-            # TOTAL COST
-            self.cost = self.classification_cost
-            if self.regularize:
-                self.cost = self.cost + self.regularization
-            # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
-            self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
-            # INTERFACE METHODS
-            self.update = M.Method([input, target],
-                                   self.cost,
-                                   updates={self.w: self.w - self.stepsize * self.grad_w,
-                                            self.b: self.b - self.stepsize * self.grad_b})
-            self.apply = M.Method(input, self.prediction)
-        def params(self):
-            return self.w, self.b
-        def _instance_initialize(self, obj, input_size = None, target_size = None,
-                                 seed = 1827, **init):
-            # obj is an "instance" of this module holding values for each member and
-            # functions for each method
-            if input_size and target_size:
-                # initialize w and b in a special way using input_size and target_size
-                sz = (input_size, target_size)
-                rng = N.random.RandomState(seed)
-                obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
-                obj.b = N.zeros(target_size)
-                obj.stepsize = 0.01
-            # here we call the default_initialize method, which takes all the name: value
-            # pairs in init and sets the property with that name to the provided value
-            # this covers setting stepsize, l2_coef; w and b can be set that way too
-            # we call it after as we want the parameter to superseed the default value.
-            M.default_initialize(obj,**init)
-        def build_regularization(self):
-            return T.zero() # no regularization!
-    class SoftmaxXERegression(RegressionLayer):
-        """ XE means cross entropy"""
-        def build_prediction(self):
-            return NN.softmax(self.activation)
-        def build_classification_cost(self, target):
-            #self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
-            self.classification_cost_matrix = (target - self.prediction)**2
-            self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
-            return T.sum(self.classification_costs)
-        def build_regularization(self):
-            self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
-            return self.l2_coef * T.sum(self.w * self.w)
 Here is how we use the model:

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -7,6 +7,7 @@ import theano
 import theano.tensor
 from theano.compile import debugmode
 import theano.compile
+import unittest
 def test0():
    x = theano.tensor.dvector()
@@ -342,128 +343,159 @@ def test_baddestroymap_c():
        pass
-def test_badviewmap():
+class Test_ViewMap(unittest.TestCase):
-    class BadAdd(gof.Op):
+    class BadAddRef(gof.Op):
        def make_node(self, a, b):
            c = b.type()
            return gof.Apply(self, [a,b], [c])
        def perform(self, node, (a,b), (c,)):
            c[0] = b
-    x = theano.tensor.dvector()
+    class BadAddSlice(gof.Op):
-    y = theano.tensor.dvector()
-    f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE')
-    try:
-        f([1,2], [3,4])
-        assert False #failed to raise error
-    except debugmode.BadViewMap:
-        return
-def test_badviewmap_c():
-    x = theano.tensor.dvector()
-    f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
-    try:
-        f([1,2])
-        assert False #failed to raise error
-    except debugmode.BadDestroyMap:
-        pass
-def test_aliased_outputs_ok():
-    #here aliased outputs is ok because they are both aliased to an input as well
-    class CustomOp(gof.Op):
-        view_map = {0:[0], 1:[0]}
-        def make_node(self, a, b):
-            c = a.type()
-            d = a.type()
-            return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
-            c[0] = a
-            d[0] = a[1:]
-    x = theano.tensor.dvector()
-    y = theano.tensor.dvector()
-    f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
-    r0, r1 = f([1,2,3,4])
-    assert numpy.all(r0 == [1,2,3,4])
-    assert numpy.all(r1 == [2,3,4])
-def test_aliased_outputs_ok_output():
-    # here aliased outputs is ok because they are both outputs of the function as a whole and
-    # thus not destroy-able
-    class CustomOp(gof.Op):
        def make_node(self, a, b):
-            c = a.type()
+            c = b.type()
-            d = a.type()
+            return gof.Apply(self, [a,b], [c])
-            return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
-            r = a * 2
-            c[0] = r
-            d[0] = r[1:]
-    x = theano.tensor.dvector()
-    y = theano.tensor.dvector()
-    f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
-    r0, r1 = f([1,2,3,4])
-    assert numpy.all(r0 == [2,4,6,8])
-    assert numpy.all(r1 == [4,6,8])
-def test_aliased_outputs_ok_shadow():
-    # here the alias between outputs is ok because one of them is not used for subsequent
-    # computation.  This is like the case where we use one output as a memory buffer to serve
-    # another output.
-    class CustomOp(gof.Op):
-        def make_node(self, a, b):
-            c = a.type()
-            d = a.type()
-            return gof.Apply(self, [a,b], [c,d])
-        def perform(self, node, (a,b), (c,)):
-            r = a * 1
-            c[0] = r
-            d[0] = r[1:]
-    x = theano.tensor.dvector()
-    y = theano.tensor.dvector()
-    f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
-    r0 = f([1,2,3,4])
-    assert numpy.all(r0 == [2,4,6,8])
-def test_aliased_outputs_bad():
-    # here the alias between outputs is not ok because destroying one destroys the other, but
-    # there's no way to warn theano about it through the view_map mechanism.
-    class CustomOp(gof.Op):
-        def make_node(self, a, b):
-            c = a.type()
-            d = a.type()
-            return gof.Apply(self, [a,b], [c,d])
        def perform(self, node, (a,b), (c,)):
-            r = a * 1
+            c[0] = b[1:3]
-            c[0] = r[:-1]
-            d[0] = r[1:]
+    def test_badviewmap_ref(self):
-    custom_op = CustomOp()
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
-    x = theano.tensor.dvector()
+        f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE')
-    y = theano.tensor.dvector()
+        try:
-    bad_xy0, bad_xy1 = custom_op(x, y)
+            f([1,2], [3,4])
-    out = bad_xy0 * 2 + bad_xy1 * 2
+            assert False #failed to raise error
-    f = theano.function([x, y], out, mode='DEBUG_MODE')
+        except debugmode.BadViewMap:
+            return
-    try:
-        r0 = f([1,2,3,4])
+    def test_badviewmap_slice(self):
-        assert False # DebugMode should have caught the error
+        x = theano.tensor.dvector()
-    except debugmode.BadViewMap, e:
+        y = theano.tensor.dvector()
-        pass
+        f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE')
+        try:
-    # the situation can be rescued by picking one of the inputs and pretending that it is
+            f([1,2], [3,4])
-    # aliased to both the outputs.  This unfairly disables any destructive operations on the
+            assert False #failed to raise error
-    # input, but guarantees correctness.
+        except debugmode.BadViewMap:
-    custom_op.view_map = {0:[0], 1:[1]}
+            return
-    f([1,2,3,4])
+    def test_goodviewmap(self):
+        goodop = self.BadAddRef()
+        goodop.view_map = {0: [1]}
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
+        f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE')
+        try:
+            f([1,5,1], [3,4,2,1,4])
+            return
+        except debugmode.BadViewMap:
+            assert False #failed to raise error
+    def test_badviewmap_c(self):
+        x = theano.tensor.dvector()
+        f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
+        try:
+            f([1,2])
+            assert False #failed to raise error
+        except debugmode.BadViewMap:
+            pass
+    def test_aliased_outputs_ok(self):
+        #here aliased outputs is ok because they are both aliased to an input as well
+        class CustomOp(gof.Op):
+            view_map = {0:[0], 1:[0]}
+            def make_node(self, a, b):
+                c = a.type()
+                d = a.type()
+                return gof.Apply(self, [a,b], [c,d])
+            def perform(self, node, (a,b), (c,d)):
+                c[0] = a
+                d[0] = a[1:]
+        x = theano.tensor.dvector('x')
+        y = theano.tensor.dvector('y')
+        f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
+        r0, r1 = f([1,2,3,4],[5,6,7,8])
+        assert numpy.all(r0 == [1,2,3,4])
+        assert numpy.all(r1 == [2,3,4])
+    def test_aliased_outputs_ok_output(self):
+        # here aliased outputs is ok because they are both outputs of the function as a whole and
+        # thus not destroy-able
+        class CustomOp(gof.Op):
+            def make_node(self, a, b):
+                c = a.type()
+                d = a.type()
+                return gof.Apply(self, [a,b], [c,d])
+            def perform(self, node, (a,b), (c,d)):
+                r = a * 2
+                c[0] = r
+                d[0] = r[1:]
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
+        f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
+        r0, r1 = f([1,2,3,4],[5,6,7,8])
+        assert numpy.all(r0 == [2,4,6,8])
+        assert numpy.all(r1 == [4,6,8])
+    def test_aliased_outputs_ok_shadow(self):
+        # here the alias between outputs is ok because one of them is not used for subsequent
+        # computation.  This is like the case where we use one output as a memory buffer to serve
+        # another output.
+        class CustomOp(gof.Op):
+            def make_node(self, a, b):
+                c = a.type()
+                d = a.type()
+                return gof.Apply(self, [a,b], [c,d])
+            def perform(self, node, (a,b), (c,d)):
+                r = a * 1
+                c[0] = r
+                d[0] = r[1:]
+        x = theano.tensor.dvector('x')
+        y = theano.tensor.dvector('y')
+        f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
+        r0 = f([1,2,3,4],[5,6,7,8])
+        assert numpy.all(r0 == [2,4,6,8])
+    def test_aliased_outputs_bad(self):
+        # here the alias between outputs is not ok because destroying one destroys the other, but
+        # there's no way to warn theano about it through the view_map mechanism.
+        class CustomOp(gof.Op):
+            def make_node(self, a, b):
+                c = a.type()
+                d = a.type()
+                return gof.Apply(self, [a,b], [c,d])
+            def perform(self, node, (a,b), (c,d)):
+                r = a * 1
+                c[0] = r[:-1]
+                d[0] = r[1:]
+        custom_op = CustomOp()
+        x = theano.tensor.dvector()
+        y = theano.tensor.dvector()
+        bad_xy0, bad_xy1 = custom_op(x, y)
+        out = bad_xy0 * 2 + bad_xy1 * 2
+        f = theano.function([x, y], out, mode='DEBUG_MODE')
+        try:
+            r0 = f([1,2,3,4],[5,6,7,8])
+            assert False # DebugMode should have caught the error
+        except debugmode.BadViewMap, e:
+            print e
+            pass
+        # the situation can be rescued by picking one of the inputs and pretending that it is
+        # aliased to both the outputs.  This unfairly disables any destructive operations on the
+        # input, but guarantees correctness.
+        #custom_op.view_map = {0:[0], 1:[1]}
+        #f([1,2,3,4],[5,6,7,8])
--- a/theano/compile/tests/test_inplace_opt_for_value.py
+++ b/theano/compile/tests/test_inplace_opt_for_value.py
@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn()
 class TanhRnnGrad(Op):
    """Gradient calculation for TanhRnn"""
+    view_map = {0: [2]}
    def __init__(self):
        pass