Updated the profiler output.

50740a0c · Frederic Bastien · e347153f · 50740a0c
--- a/doc/hpcs2011_tutorial/presentation.tex
+++ b/doc/hpcs2011_tutorial/presentation.tex
@@ -687,17 +687,15 @@ To replace the default mode with this mode, use the Theano flags \texttt{mode=Pr
 To enable the memory profiling use the flags \texttt{ProfileMode.profile\_memory=True} 
 \begin{Verbatim}
-Time since import 1.486s
+Time since import 2.697s
-Theano compile time: 1.017s (67.9% since import)
+Theano compile time: 1.046s (38.8% since import)
-    Optimization time: 0.805s
+    Optimization time: 0.804s
-    Linker time: 0.199s
+    Linker time: 0.230s
-Theano fct call 0.002s (0.1% since import)
+Theano fct call 0.028s (1.0% since import)
-   Theano Op time 0.001s 0.0%(since import) 36.8%(of fct call)
+   Theano Op time 0.026s 1.0%(since import) 93.7%(of fct call)
-   Theano function overhead in ProfileMode 0.001s 0.1%(since import) 
+   Theano function overhead in ProfileMode 0.002s 0.1%(since import) 6.3%(of fct call)
-                                           63.2%(of fct call)
+11 Theano fct call, 0.003s per call
-11 Theano fct call, 0.000s per call
+Rest of the time since import 1.623s 60.2%
-Rest of the time since import 0.479s 32.0%
 \end{Verbatim}
 \end{frame}
@@ -708,8 +706,8 @@ Theano outputs:
 \begin{Verbatim}
 Theano fct summary:
 <% total fct time> <total time> <time per call> <nb call> <fct name>
-   97.1% 0.002s 1.64e-04s 10 train
+   97.2% 0.027s 2.70e-03s 10 train
-    2.9% 0.000s 4.91e-05s 1 predict
+    2.8% 0.001s 7.84e-04s 1 predict
 \end{Verbatim}
 \end{frame}
@@ -722,15 +720,14 @@ Single Op-wise summary:
 <% of local_time spent on this kind of Op> <cumulative %> 
    <self seconds> <cumulative seconds> <time per call> <nb_call>
    <nb_op> <nb_apply> <Op name>
-   30.8%   30.8%  0.000s  0.000s  1.86e-05s      10  1  1 <'Gemv'>
+   82.0%   82.0%  0.021s  0.021s  2.13e-03s      10  1  1 <Gemv>
-   23.8%   54.6%  0.000s  0.000s  1.58e-06s *    91 10 10 <'Elemwise'>
+   14.1%   96.1%  0.004s  0.025s  3.33e-04s      11  1  2 <Dot>
-   18.3%   72.9%  0.000s  0.000s  1.10e-05s      10  1  1 <'Alloc'>
+    2.9%   98.9%  0.001s  0.026s  8.24e-06s *    91 10 10 <Elemwise>
-   15.9%   88.7%  0.000s  0.001s  8.71e-06s      11  1  2 <'Dot'>
+    0.6%   99.6%  0.000s  0.026s  1.69e-05s      10  1  1 <Alloc>
-    7.7%   96.4%  0.000s  0.001s  1.49e-06s *    31  2  4 <'DimShuffle'>
+    0.3%   99.9%  0.000s  0.026s  2.43e-06s *    31  2  4 <DimShuffle>
-    2.0%   98.4%  0.000s  0.001s  1.22e-06s *    10  1  1 <'Sum'>
+    0.1%  100.0%  0.000s  0.026s  1.91e-06s *    10  1  1 <Sum>
-    1.6%  100.0%  0.000s  0.001s  9.78e-07s *    10  1  1 <'Shape_i'>
+    0.0%  100.0%  0.000s  0.026s  1.19e-06s *    10  1  1 <Shape_i>
 (*) Op is running a c implementation
 \end{Verbatim}
 \end{frame}
@@ -743,16 +740,17 @@ Op-wise summary:
 <% of local_time spent on this kind of Op> <cumulative %>
    <self seconds> <cumulative seconds> <time per call>
    <nb_call> <nb apply> <Op name>
-   31.4%   31.4%  0.000s  0.000s  1.93e-05s      10  1 Gemv{inplace}
+   82.0%   82.0%  0.021s  0.021s  2.13e-03s      10  1 Gemv{inplace}
-   16.9%   48.3%  0.000s  0.000s  1.04e-05s      10  1 Alloc
+   14.1%   96.1%  0.004s  0.025s  3.33e-04s      11  2 dot
-   15.5%   63.8%  0.000s  0.000s  8.65e-06s      11  2 dot
+    1.4%   97.5%  0.000s  0.025s  3.63e-05s *    10  1 Elemwise{Composite{
-    5.0%   68.8%  0.000s  0.000s  3.05e-06s *    10  1 Elemwise{
+        scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}
-        Composite{scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}
+    0.6%   98.1%  0.000s  0.026s  1.69e-05s      10  1 Alloc
-    4.3%   73.1%  0.000s  0.000s  1.27e-06s *    21  3 InplaceDimShuffle{x}
+    0.4%   98.5%  0.000s  0.026s  1.02e-05s *    10  1 Elemwise{Composite{
-    3.3%   76.4%  0.000s  0.000s  2.00e-06s *    10  1 Elemwise{sub,no_inplace}
+        exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)]
-    2.9%   79.3%  0.000s  0.000s  1.79e-06s *    10  1 Elemwise{gt,no_inplace}
+    0.3%   98.8%  0.000s  0.026s  8.80e-06s *    10  1 Elemwise{
-    2.5%   84.5%  0.000s  0.001s  1.53e-06s *    10  1 InplaceDimShuffle{1,0}
+        ScalarSigmoid}[(0, 0)]
-   ... (remaining 9 Apply account for 18.3%(0.00s) of the runtime)
+    0.2%   99.0%  0.000s  0.026s  2.40e-06s *    21  3 InplaceDimShuffle{x}
+   ... (remaining 10 Apply account for 1.0%(0.00s) of the runtime)
 (*) Op is running a c implementation
 \end{Verbatim}
 \end{frame}
@@ -766,14 +764,14 @@ Apply-wise summary:
 <% of local_time spent at this position> <cumulative %%>
    <apply time> <cumulative seconds> <time per call>
    <nb_call> <Apply position> <Apply Op name>
-   29.8%   29.8%  0.000s  0.000s 1.96e-05s  10  15 Gemv{inplace}
+   82.0%   82.0%  0.021s  0.021s 2.13e-03s  10  15 Gemv{inplace}(
-        (<TensorType(float64, vector)>, {-0.1}, InplaceDimShuffle{1,0}.0, Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)].0, {0.998})
+        w, TensorConstant{-0.1}, InplaceDimShuffle{1,0}.0, Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)].0, TensorConstant{0.998})
-   15.8%   45.6%  0.000s  0.000s 1.04e-05s  10  10 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
+   11.5%   93.4%  0.003s  0.024s 2.99e-04s  10   1 dot(x, w)
-   14.0%   59.6%  0.000s  0.000s 9.20e-06s  10   1 dot(<TensorType(float64, matrix)>, <TensorType(float64, vector)>)
+    2.6%   96.1%  0.001s  0.025s 6.81e-04s  1   1 dot(x, w)
-    5.6%   65.2%  0.000s  0.000s 3.67e-06s  10   9 Elemwise{Composite{scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}(<TensorType(float64, vector)>, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, Elemwise{neg,no_inplace}.0)
+    1.4%   97.5%  0.000s  0.025s 3.63e-05s  10   9 Elemwise{Composite{scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}(y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, Elemwise{neg,no_inplace}.0)
-    3.2%   68.4%  0.000s  0.000s 2.12e-06s  10   4 Elemwise{sub,no_inplace}(TensorConstant{[ 1.]}, <TensorType(float64, vector)>)
+    0.6%   98.1%  0.000s  0.026s 1.69e-05s  10  10 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
-    2.9%   71.3%  0.000s  0.000s 1.93e-06s  10  12 Elemwise{gt,no_inplace}(Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}}}[(0, 0)].0, TensorConstant{[ 0.5]})
+    0.4%   98.5%  0.000s  0.026s 1.02e-05s  10  13 Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)](Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)].0, Alloc.0, y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, InplaceDimShuffle{x}.0)
-   ... (remaining 14 Apply instances account for 28.6%(0.00s) of the runtime)
+   ... (remaining 14 Apply instances account for 1.5%(0.00s) of the runtime)
 \end{Verbatim}
 \end{frame}
@@ -786,17 +784,18 @@ Profile of Theano functions memory:
 (This check only the output of each apply node. It don't check the 
    temporary memory used by the op in the apply node.)
 Theano fct: train
-    Max without gc, inplace and view (KB) 4
+    Max without gc, inplace and view (KB) 2481
-    Max FAST_RUN_NO_GC (KB) 0
+    Max FAST_RUN_NO_GC (KB) 16
-    Max FAST_RUN (KB) 0
+    Max FAST_RUN (KB) 16
-    Memory saved by view (KB) 3
+    Memory saved by view (KB) 2450
-    Memory saved by inplace (KB) 0
+    Memory saved by inplace (KB) 15
    Memory saved by GC (KB) 0
    <Sum apply outputs (bytes)> <Apply outputs memory size(bytes)> 
        <created/inplace/view> <Apply node>
-            3200B  [3200] v InplaceDimShuffle{1,0}(<TensorType(float64, matrix)>)
+    <created/inplace/view> is taked from the op declaration, not ...
-             800B  [800] i Gemv{inplace}(<TensorType(float64, vector)>, TensorConstant{-0.1}, InplaceDimShuffle{1,0}.0, Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)].0, TensorConstant{0.998})
+         2508800B  [2508800] v InplaceDimShuffle{1,0}(x)
-              32B  [32] c Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
+            6272B  [6272] i Gemv{inplace}(w, ...)
+            3200B  [3200] c Elemwise{Composite{...}}(y, ...)
 \end{Verbatim}
 \end{frame}