Use doctest to update pydotprint plots and debugprint output

81e6bba0 · Christof Angermueller · 93cc458d · 81e6bba0 · 81e6bba0 · 81e6bba0
--- a/doc/cifarSC2011/advanced_theano.txt
+++ b/doc/cifarSC2011/advanced_theano.txt
@@ -86,7 +86,7 @@ Loops
 - ``sum()`` could be computed by scanning the z + x(i) function over a list, given an initial state of ``z=0``.
 - Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
 - The advantage of using ``scan`` over for loops
-  
+
  - The number of iterations to be part of the symbolic graph
  - Minimizes GPU transfers if GPU is involved
  - Compute gradients through sequential steps
@@ -111,10 +111,10 @@ Loops
  # Scan has provided us with A**1 through A**k.  Keep only the last
  # value. Scan notices this and does not waste memory saving them.
  final_result = result[-1]
-  
+
  power = theano.function(inputs=[A,k], outputs=final_result,
                        updates=updates)
-  
+
  print power(range(10),2)
  #[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]

@@ -152,7 +152,7 @@ Loops
 Exercise 4
 -----------

- Run both examples 
+- Run both examples
 - Modify and execute the polynomial example to have the reduction done by scan


@@ -275,62 +275,99 @@ Exercise 5
 Printing/Drawing Theano graphs
 ------------------------------

- Pretty Printing
+Consider the following logistic regression model:
+
+>>> import numpy
+>>> import theano
+>>> import theano.tensor as T
+>>> rng = numpy.random
+>>> # Training data
+>>> N = 400
+>>> feats = 784
+>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
+>>> training_steps = 10000
+>>> # Declare Theano symbolic variables
+>>> x = T.matrix("x")
+>>> y = T.vector("y")
+>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
+>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
+>>> x.tag.test_value = D[0]
+>>> y.tag.test_value = D[1]
+>>> # Construct Theano expression graph
+>>> p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
+>>> # Compute gradients
+>>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
+>>> gw,gb = T.grad(cost, [w,b])
+>>> # Training and prediction function
+>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
+>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")

-``theano.printing.pprint(variable)``

->>> theano.printing.pprint(prediction) # doctest: +SKIP
-gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),TensorConstant{0.5})
+- Pretty Printing
+
+>>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
+'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),
+TensorConstant{0.5})'


 - Debug Print

-``theano.printing.debugprint({fct, variable, list of variables})``
-
->>> theano.printing.debugprint(prediction) # doctest: +SKIP
-Elemwise{gt,no_inplace} [@181772236] ''
- |Elemwise{true_div,no_inplace} [@181746668] ''
- | |InplaceDimShuffle{x} [@181746412] ''
- | | |TensorConstant{1} [@181745836]
- | |Elemwise{add,no_inplace} [@181745644] ''
- | | |InplaceDimShuffle{x} [@181745420] ''
- | | | |TensorConstant{1} [@181744844]
- | | |Elemwise{exp,no_inplace} [@181744652] ''
- | | | |Elemwise{sub,no_inplace} [@181744012] ''
- | | | | |Elemwise{neg,no_inplace} [@181730764] ''
- | | | | | |dot [@181729676] ''
- | | | | | | |x [@181563948]
- | | | | | | |w [@181729964]
- | | | | |InplaceDimShuffle{x} [@181743788] ''
- | | | | | |b [@181730156]
- |InplaceDimShuffle{x} [@181771788] ''
- | |TensorConstant{0.5} [@181771148]
->>> theano.printing.debugprint(predict) # doctest: +SKIP
-Elemwise{Composite{neg,{sub,{{scalar_sigmoid,GT},neg}}}} [@183160204] ''   2
- |dot [@183018796] ''   1
- | |x [@183000780]
- | |w [@183000812]
- |InplaceDimShuffle{x} [@183133580] ''   0
- | |b [@183000876]
- |TensorConstant{[ 0.5]} [@183084108]
+>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
+    Elemwise{gt,no_inplace} [@A] ''
+    |Elemwise{true_div,no_inplace} [@B] ''
+    | |DimShuffle{x} [@C] ''
+    | | |TensorConstant{1} [@D]
+    | |Elemwise{add,no_inplace} [@E] ''
+    |   |DimShuffle{x} [@F] ''
+    |   | |TensorConstant{1} [@D]
+    |   |Elemwise{exp,no_inplace} [@G] ''
+    |     |Elemwise{sub,no_inplace} [@H] ''
+    |       |Elemwise{neg,no_inplace} [@I] ''
+    |       | |dot [@J] ''
+    |       |   |x [@K]
+    |       |   |w [@L]
+    |       |DimShuffle{x} [@M] ''
+    |         |b [@N]
+    |DimShuffle{x} [@O] ''
+      |TensorConstant{0.5} [@P]
+
+>>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
+    Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
+     |CGemv{inplace} [@B] ''   3
+     | |Alloc [@C] ''   2
+     | | |TensorConstant{0.0} [@D]
+     | | |Shape_i{0} [@E] ''   1
+     | |   |x [@F]
+     | |TensorConstant{1.0} [@G]
+     | |x [@F]
+     | |w [@H]
+     | |TensorConstant{0.0} [@D]
+     |InplaceDimShuffle{x} [@I] ''   0
+     | |b [@J]
+     |TensorConstant{(1,) of 0.5} [@K]
+

 - Picture Printing of Graphs
+``pydotprint`` requires graphviz and pydot.

->>> theano.printing.pydotprint_variables(prediction) # doctest: +SKIP
+>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
+The output file is available at pics/logreg_pydotprint_prediction.png

-.. image:: ../hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png
+.. image:: ./pics/logreg_pydotprint_prediction.png
   :width: 800 px

-All pydotprint* requires graphviz and pydot
+>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
+The output file is available at pics/logreg_pydotprint_predict.png

->>> theano.printing.pydotprint(predict) # doctest: +SKIP
-
-.. image:: ../hpcs2011_tutorial/pics/logreg_pydotprint_predic.png
+.. image:: ./pics/logreg_pydotprint_predict.png
   :width: 800 px

->>> theano.printing.pydotprint(train) # This is a small train example! # doctest: +SKIP
+>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
+The output file is available at pics/logreg_pydotprint_train.png

-.. image:: ../hpcs2011_tutorial/pics/logreg_pydotprint_train.png
+.. image:: ./pics/logreg_pydotprint_train.png
   :width: 1500 px


@@ -353,7 +390,7 @@ Debugging
  - For pure symbolic variables uses ``x.tag.test_value = numpy.random.rand(5,10)``

 - Run with the flag ``mode=FAST_COMPILE``
-  
+
  - Few optimizations
  - Run Python code (better error messages and can be debugged interactively in the Python debugger)


--- a/doc/cifarSC2011/pics/logreg_pydotprint_predic.png
+++ b/doc/cifarSC2011/pics/logreg_pydotprint_predic.png
--- a/doc/cifarSC2011/pics/logreg_pydotprint_predict.png
+++ b/doc/cifarSC2011/pics/logreg_pydotprint_predict.png
--- a/doc/cifarSC2011/pics/logreg_pydotprint_prediction.png
+++ b/doc/cifarSC2011/pics/logreg_pydotprint_prediction.png
--- a/doc/cifarSC2011/pics/logreg_pydotprint_train.png
+++ b/doc/cifarSC2011/pics/logreg_pydotprint_train.png
--- a/doc/hpcs2011_tutorial/logreg_example.py
+++ b/doc/hpcs2011_tutorial/logreg_example.py
@@ -60,9 +60,9 @@ print predict(D[0])
 theano.printing.pydotprint(predict,
                           outfile="pics/logreg_pydotprint_predic.png",
                           var_with_name_simple=True)
-theano.printing.pydotprint_variables(prediction,
-                           outfile="pics/logreg_pydotprint_prediction.png",
-                           var_with_name_simple=True)
+theano.printing.pydotprint(prediction,
+                          outfile="pics/logreg_pydotprint_prediction.png",
+                          var_with_name_simple=True)
 theano.printing.pydotprint(train,
                           outfile="pics/logreg_pydotprint_train.png",
                           var_with_name_simple=True)
--- a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_predic.png
+++ b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_predic.png
--- a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png
+++ b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png
--- a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_train.png
+++ b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_train.png
--- a/doc/tutorial/pics/symbolic_graph_opt.png
+++ b/doc/tutorial/pics/symbolic_graph_opt.png
--- a/doc/tutorial/symbolic_graphs.txt
+++ b/doc/tutorial/symbolic_graphs.txt
@@ -64,10 +64,9 @@ The graph can be traversed starting from outputs (the result of some
 computation) down to its inputs using the owner field.
 Take for example the following code:

-.. code-block:: python
-
-    x = T.dmatrix('x')
-    y = x * 2.
+>>> import theano
+>>> x = theano.tensor.dmatrix('x')
+>>> y = x * 2.

 If you enter ``type(y.owner)`` you get ``<class 'theano.gof.graph.Apply'>``, 
 which is the apply node that connects the op and the inputs to get this
@@ -85,20 +84,20 @@ multiplication is done between the inputs:
 >>> y.owner.inputs[0]
 x
 >>> y.owner.inputs[1]
-InplaceDimShuffle{x,x}.0
+DimShuffle{x,x}.0

 Note that the second input is not 2 as we would have expected. This is 
 because 2 was first :term:`broadcasted <broadcasting>` to a matrix of 
 same shape as *x*. This is done by using the op ``DimShuffle`` :

 >>> type(y.owner.inputs[1])
-<class 'theano.tensor.basic.TensorVariable'>
+<class 'theano.tensor.var.TensorVariable'>
 >>> type(y.owner.inputs[1].owner)
 <class 'theano.gof.graph.Apply'>
->>> y.owner.inputs[1].owner.op
-<class 'theano.tensor.elemwise.DimShuffle object at 0x14675f0'>
+>>> y.owner.inputs[1].owner.op # doctest: +SKIP
+<theano.tensor.elemwise.DimShuffle object at 0x106fcaf10>
 >>> y.owner.inputs[1].owner.inputs
-[2.0]
+[TensorConstant{2.0}]


 Starting from this graph structure it is easier to understand how 
@@ -159,10 +158,23 @@ as we apply it. Consider the following example of optimization:
 >>> b = a + a ** 10                    # build symbolic expression
 >>> f = theano.function([a], b)        # compile function
 >>> print f([0, 1, 2])                 # prints `array([0,2,1026])`
+[    0.     2.  1026.]
+>>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)
+The output file is available at ./pics/symbolic_graph_opt.png


+.. |g1| image:: ../hpcs2011_tutorial/pics/f_unoptimized.png
+    :width: 300 px
+
+.. |g2| image:: ./pics/symbolic_graph_opt.png
+    :width: 500 px
+
+We used :func:`theano.printing.pydotprint` to visualize the optimized graph
+(right), which is much more compact than the unoptimized graph (left).
+
 ======================================================  =====================================================
        Unoptimized graph                                    Optimized graph
 ======================================================  =====================================================
-.. image:: ../hpcs2011_tutorial/pics/f_unoptimized.png   .. image:: ../hpcs2011_tutorial/pics/f_optimized.png
+|g1|                                                    |g2|
 ======================================================  =====================================================
+