Merge pull request #1699 from nouiz/test_example

Fix stabilization opt not being applied(in dev version only)

Merge pull request #1699 from nouiz/test_example
e42d3af2 · Frédéric Bastien · e8f6cb73 · 165a7ae5 · e42d3af2 · e42d3af2
--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -468,7 +468,7 @@ The preceding elements are featured in this more realistic example.  It will be
  
  N = 400
  feats = 784
-  D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
+  D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
  training_steps = 10000
  
  # Declare Theano symbolic variables
@@ -484,7 +484,7 @@ The preceding elements are featured in this more realistic example.  It will be
  prediction = p_1 > 0.5                    # The prediction thresholded
  xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
  cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
-  gw,gb = T.grad(cost, [w, b])              # Compute the gradient of the cost
+  gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
                                            # (we shall return to this in a
                                            # following section of this tutorial)


--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3936,7 +3936,7 @@ def attempt_distribution(factor, num, denum):
                                   neg_pairs))), num, denum


-@gof.local_optimizer([T.mul])
+@gof.local_optimizer([T.mul, T.true_div])
 def local_greedy_distributor(node):
    """
    This optimization tries to apply distributivity of multiplication
@@ -3948,9 +3948,10 @@ def local_greedy_distributor(node):
    The following expressions are simplified:
    1. ((a/x + b/y) * x * y) --> a*y + b*x
    2. ((a/x + b) * x) --> a + b*x
+    3. There other form too where node is a true_div.

    The following expressions are not simplified:
-    3. ((a + b) * x) -/-> a*x + b*x
+    4. ((a + b) * x) -/-> a*x + b*x

    This optimization aims to reduce computational cost. It may also
    increase numerical stability, e.g. when x and/or y tend to 0 in

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -162,13 +162,26 @@ def test_add_canonizer_problem0():
 class test_greedy_distribute(unittest.TestCase):
    def test_main(self):
        a, b, c, d, x, y, z = matrices('abcdxyz')
+
+        #1. ((a/x + b/y) * x * y) --> a*y + b*x
        e = (a / z + b / x) * x * z
        g = FunctionGraph([a, b, c, d, x, y, z], [e])
-        ##print pprint(g.outputs[0])
+        #print pprint(g.outputs[0])
+        mul_canonizer.optimize(g)
+        gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor),
+                          order='out_to_in').optimize(g)
+        #print pprint(g.outputs[0])
+        assert str(pprint(g.outputs[0])) == "((a * x) + (b * z))"
+
+        #2. ((a/x + b) * x) --> a + b*x
+        e = (a / x + b) * x
+        g = FunctionGraph([a, b, x], [e])
+        #print pprint(g.outputs[0])
        mul_canonizer.optimize(g)
        gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor),
                          order='out_to_in').optimize(g)
-        ##print pprint(g.outputs[0])
+        #print pprint(g.outputs[0])
+        assert str(pprint(g.outputs[0])) == "(a + (b * x))"

    def test_kording_bug(self):
        x, y = vectors('xy')

--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py
@@ -9,7 +9,9 @@ import numpy
 from numpy import array

 from theano import config
-from theano.tests  import unittest_tools as utt
+from theano.tests import unittest_tools as utt
+from theano.sandbox.rng_mrg import MRG_RandomStreams
+from theano.tensor.shared_randomstreams import RandomStreams


 class T_extending(unittest.TestCase):
@@ -650,7 +652,86 @@ class T_examples(unittest.TestCase):
        rng.set_state(state_after_v0)
        rv_u.rng.set_value(rng, borrow=True)
        v2 = f()             # v2 != v1
+        v3 = f()             # v3 == v1
        assert numpy.all(v1 != v2)
+        assert numpy.all(v1 == v3)
+
+    def test_copy_random_state(self):
+
+        class Graph():
+            def __init__(self, seed=123):
+                self.rng = RandomStreams(seed)
+                self.y = self.rng.uniform(size=(1,))
+
+        g1 = Graph(seed=123)
+        f1 = theano.function([], g1.y)
+
+        g2 = Graph(seed=987)
+        f2 = theano.function([], g2.y)
+
+        #print 'By default, the two functions are out of sync.'
+        v1 =  f1()
+        v2 =  f2()
+
+        def copy_random_state(g1, g2):
+            if isinstance(g1.rng, MRG_RandomStreams):
+                g2.rng.rstate = g1.rng.rstate
+            for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
+                su2[0].set_value(su1[0].get_value())
+
+        #print 'We now copy the state of the theano random number generators.'
+        copy_random_state(g1, g2)
+        v3 = f1()
+        v4 = f2()
+        assert numpy.allclose(v1, 0.72803009)
+        assert numpy.allclose(v2, 0.55056769)
+        assert numpy.allclose(v3, 0.59044123)
+        assert numpy.allclose(v4, 0.59044123)
+
+    def test_examples_real_example(self):
+        rng = numpy.random
+
+        N = 400
+        feats = 784
+        D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
+        training_steps = 10000
+
+        # Declare Theano symbolic variables
+        x = T.matrix("x")
+        y = T.vector("y")
+        w = theano.shared(rng.randn(feats), name="w")
+        b = theano.shared(0., name="b")
+        print "Initial model:"
+        print w.get_value(), b.get_value()
+
+        # Construct Theano expression graph
+        p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
+        prediction = p_1 > 0.5                    # The prediction thresholded
+        xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
+        cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
+        gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
+                                                  # (we shall return to this in a
+                                                  # following section of this tutorial)
+
+        # Compile
+        train = theano.function(
+            inputs=[x,y],
+            outputs=[prediction, xent],
+            updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
+        predict = theano.function(inputs=[x], outputs=prediction)
+
+        # Train
+        for i in range(training_steps):
+            pred, err = train(D[0], D[1])
+
+        print "Final model:"
+        print w.get_value(), b.get_value()
+        print "target values for D:", D[1]
+        print "prediction on D:", predict(D[0])
+
+        # A user reported that this happened on the mailig list.
+        assert not numpy.isnan(b.get_value()).any()
+        assert not numpy.isnan(w.get_value()).any()


 class T_aliasing(unittest.TestCase):