revisions to ProfileMode docs

95200d31 · James Bergstra · f0f426ce · 95200d31 · 95200d31
--- a/benchmark/regression/regression.py
+++ b/benchmark/regression/regression.py
+import theano
+import numpy as N
+from theano import tensor as T
+from theano.tensor import nnet as NN
+from theano.compile import module as M
+
+class RegressionLayer(M.Module):
+    def __init__(self, input = None, target = None, regularize = True):
+        super(RegressionLayer, self).__init__() #boilerplate
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        # ACQUIRE/MAKE INPUT AND TARGET
+        if not input:
+            input = T.matrix('input')
+        if not target:
+            target = T.matrix('target')
+        # HYPER-PARAMETERS
+        self.stepsize = T.scalar()  # a stepsize for gradient descent
+        # PARAMETERS
+        self.w = T.matrix()  #the linear transform to apply to our input points
+        self.b = T.vector()  #a vector of biases, which make our transform affine instead of linear
+        # REGRESSION MODEL
+        self.activation = T.dot(input, self.w) + self.b
+        self.prediction = self.build_prediction()
+        # CLASSIFICATION COST
+        self.classification_cost = self.build_classification_cost(target)
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+        # TOTAL COST
+        self.cost = self.classification_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
+        self.grad_w, self.grad_b, grad_act = T.grad(self.cost, [self.w, self.b, self.prediction])
+        print 'grads', self.grad_w, self.grad_b
+        # INTERFACE METHODS
+        self.update = M.Method([input, target],
+                               [self.cost, self.grad_w, self.grad_b, grad_act],
+                               updates={self.w: self.w - self.stepsize * self.grad_w,
+                                        self.b: self.b - self.stepsize * self.grad_b})
+        self.apply = M.Method(input, self.prediction)
+    def params(self):
+        return self.w, self.b
+    def _instance_initialize(self, obj, input_size = None, target_size = None,
+                             seed = 1827, **init):
+        # obj is an "instance" of this module holding values for each member and
+        # functions for each method
+        if input_size and target_size:
+            # initialize w and b in a special way using input_size and target_size
+            sz = (input_size, target_size)
+            rng = N.random.RandomState(seed)
+            obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
+            obj.b = N.zeros(target_size)
+            obj.stepsize = 0.01
+        # here we call the default_initialize method, which takes all the name: value
+        # pairs in init and sets the property with that name to the provided value
+        # this covers setting stepsize, l2_coef; w and b can be set that way too
+        # we call it after as we want the parameter to superseed the default value.
+        M.default_initialize(obj,**init)
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class SpecifiedRegressionLayer(RegressionLayer):
+    """ XE mean cross entropy"""
+    def build_prediction(self):
+        # return NN.softmax(self.activation) #use this line to expose a slow subtensor
+        # implementation
+        return NN.sigmoid(self.activation)
+    def build_classification_cost(self, target):
+        self.classification_cost_matrix = (target - self.prediction)**2
+        #print self.classification_cost_matrix.type
+        self.classification_costs = T.sum(self.classification_cost_matrix, axis=1)
+        return T.sum(self.classification_costs)
+    def build_regularization(self):
+        self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
+        return self.l2_coef * T.sum(self.w * self.w)
+
+
+def test_module_advanced_example():
+
+    profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
+
+    data_x = N.random.randn(4, 10)
+    data_y = [ [int(x)] for x in (N.random.randn(4) > 0)]
+
+
+    model = SpecifiedRegressionLayer(regularize = False).make(input_size = 10,
+                       target_size = 1,
+                       stepsize = 0.1,
+                       mode=profmode)
+
+    for i in xrange(1000):
+       xe, gw, gb, ga = model.update(data_x, data_y)
+       if i % 100 == 0:
+           print i, xe
+           pass
+       #for inputs, targets in my_training_set():
+           #print "cost:", model.update(inputs, targets)
+
+    print "final weights:", model.w
+    print "final biases:", model.b
+
+    profmode.print_summary()
--- a/doc/advanced/profilemode.txt
+++ b/doc/advanced/profilemode.txt
@@ -52,58 +52,61 @@ profile, then call ``profmode.print_summary()``. This will provide you with
 the desired timing information, indicating where your graph is spending most
 of its time.

-This is best shown through an example. Lets use the example of logistic
-regression, covered previously in the `Module`_ section.
-
-.. _Module : module.html?highlight=nnet#advanced-example
+This is best shown through an example.
+Lets use the example of logistic
+regression.  (Code for this example is in the file
+``benchmark/regression/regression.py``.) 

 Compiling the module with ProfileMode and calling ``profmode.print_summary()``
 generates the following output:

 .. code-block:: python
+    
+    """
+    ProfileMode.print_summary()
+    ---------------------------

-    local_time 0.0508708953857 (Time spent running thunks)
+    local_time 0.0749197006226 (Time spent running thunks)
    Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)
-            0.397   6       Subtensor{0, ::}
-            0.110   18      <theano.tensor.blas.Gemm object at 0x15eb3d0>
-            0.047   1       _dot22
-            0.033   0       InplaceDimShuffle{x,0}
-            0.032   2       InplaceDimShuffle{1,0}
-            0.030   7       second
-            0.029   8       <theano.tensor.nnet.SoftmaxWithBias object at 0x1619150>
-            0.028   16      Sum
-            0.027   3       InplaceDimShuffle{x}
-            0.024   9       sub
-            0.024   17      Sum{0}
-            0.024   15      <theano.tensor.nnet.SoftmaxWithBiasDx object at 0x177fcd0>
-            0.023   10      sqr
-            0.023   12      Sum{1}
-            0.023   4       neg
-       ... (remaining 6 Apply instances account for 0.13 of the runtime)
+            0.069   15      _dot22
+            0.064   1       _dot22
+            0.053   0       InplaceDimShuffle{x,0}
+            0.049   2       InplaceDimShuffle{1,0}
+            0.049   10      mul
+            0.049   6       Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
+            0.049   3       InplaceDimShuffle{x}
+            0.049   4       InplaceDimShuffle{x,x}
+            0.048   14      Sum{0}
+            0.047   7       sub
+            0.046   17      mul
+            0.045   9       sqr
+            0.045   8       Elemwise{sub}
+            0.045   16      Sum
+            0.044   18      mul
+       ... (remaining 6 Apply instances account for 0.25 of the runtime)
    Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>
-            0.397     Subtensor{0, ::}
-            0.110   * <theano.tensor.blas.Gemm object at 0x15eb3d0>
-            0.047   * _dot22
-            0.043   * Elemwise{Mul{output_types_preference=<theano.scalar.basic.transfer_type object at 0x176dbd0>}}[(0, 1)]
-            0.033   * InplaceDimShuffle{x,0}
-            0.032   * InplaceDimShuffle{1,0}
-            0.030   * second
-            0.029   * <theano.tensor.nnet.SoftmaxWithBias object at 0x1619150>
-            0.028   * Sum
-            0.027   * InplaceDimShuffle{x}
-            0.024   * sub
-            0.024   * Sum{0}
-            0.024   * <theano.tensor.nnet.SoftmaxWithBiasDx object at 0x177fcd0>
-            0.023   * sqr
-            0.023   * Sum{1}
-            0.023   * neg
-            0.022   * Elemwise{Sub{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1900850>}}[(0, 0)]
-            0.021   * Elemwise{Add{output_types_preference=<theano.scalar.basic.transfer_type object at 0x18ab350>}}[(0, 0)]
-            0.021   * Elemwise{Second{output_types_preference=<theano.scalar.basic.transfer_type object at 0x177f090>}}[(0, 1)]
-            0.020   * Elemwise{Neg{output_types_preference=<theano.scalar.basic.transfer_type object at 0x17b4690>}}[(0, 0)]
+            0.139   * mul
+            0.134   * _dot22
+            0.092   * sub
+            0.085   * Elemwise{Sub{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1779f10>}}[(0, 0)]
+            0.053   * InplaceDimShuffle{x,0}
+            0.049   * InplaceDimShuffle{1,0}
+            0.049   * Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
+            0.049   * InplaceDimShuffle{x}
+            0.049   * InplaceDimShuffle{x,x}
+            0.048   * Sum{0}
+            0.045   * sqr
+            0.045   * Sum
+            0.043   * Sum{1}
+            0.042   * Elemwise{Mul{output_types_preference=<theano.scalar.basic.transfer_type object at 0x17a0f50>}}[(0, 1)]
+            0.041   * Elemwise{Add{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736a50>}}[(0, 0)]
+            0.039   * Elemwise{Second{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736d90>}}[(0, 1)]
       ... (remaining 0 Ops account for 0.00 of the runtime)
    (*) Op is running a c implementation

+    """
+
+
 The summary has two components to it. In the first section called the Apply-wise 
 summary, timing information is provided for the worst offending Apply nodes. This 
 corresponds to individual nodes within your graph which take the longest to