Merge pull request #3436 from nouiz/small

Doc, more profiling information

Merge pull request #3436 from nouiz/small
fa044b5d · abergeron · 865ea186 · 37bf28f6 · fa044b5d · fa044b5d
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -360,6 +360,14 @@ import theano and print the config variable, as in:
    The profiling output can be either directed to stderr
    (default), or stdout or an arbitrary file.

+.. attribute:: config.profiling.debugprint
+
+    Bool value: either True or False
+
+    Default False
+
+    Do a debugprint of the profiled functions
+
 .. attribute:: config.lib.amdlibm

    Bool value: either True or False

--- a/doc/library/tensor/nnet/bn.txt
+++ b/doc/library/tensor/nnet/bn.txt
+.. _libdoc_tensor_nnet_bn:
+
+================================
+:mod:`bn` -- Batch Normalization
+================================
+
+.. module:: tensor.nnet.bn
+   :platform: Unix, Windows
+   :synopsis: Batch Normalization
+.. moduleauthor:: LISA
+
+
+.. autofunction:: theano.tensor.nnet.bn.batch_normalization
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -76,6 +76,13 @@ AddConfigVar('profiling.destination',
             StrParam('stderr'),
             in_c_key=False)

+AddConfigVar('profiling.debugprint',
+             """
+             Do a debugprint of the profiled functions
+             """,
+             BoolParam(False),
+             in_c_key=False)
+

 def _atexit_print_fn():
    """
@@ -1285,6 +1292,9 @@ class ProfileStats(object):
        elif self.fct_callcount > 0:
            print("  No execution time accumulated "
                  "(hint: try config profiling.time_thunks=1)", file=file)
+        if config.profiling.debugprint:
+            fcts = set([n.fgraph for n in self.apply_time.keys()])
+            theano.printing.debugprint(fcts, print_type=True)
        if self.variable_shape or self.variable_strides:
            self.summary_memory(file, n_apply_to_print)
        if self.optimizer_profile:

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -102,7 +102,7 @@ def debugprint(obj, depth=-1, print_type=False,
    results_to_print = []
    profile_list = []
    order = []
-    if isinstance(obj, (list, tuple)):
+    if isinstance(obj, (list, tuple, set)):
        lobj = obj
    else:
        lobj = [obj]
@@ -120,7 +120,8 @@ def debugprint(obj, depth=-1, print_type=False,
            order = obj.maker.fgraph.toposort()
        elif isinstance(obj, gof.FunctionGraph):
            results_to_print.extend(obj.outputs)
-            profile_list.extend([None for item in obj.outputs])
+            profile_list.extend([getattr(obj, 'profile', None)
+                                 for item in obj.outputs])
            order = obj.toposort()
        elif isinstance(obj, (integer_types, float, np.ndarray)):
            print(obj)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -1100,7 +1100,7 @@ class Gemm(GemmRelated):
            dims[0] = PyArray_DIMS(%(_z)s)[0];
            dims[1] = PyArray_DIMS(%(_z)s)[1];
            %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
-                                                          PyArray_TYPE((PyArrayObject*) py_%(_z)s));
+                                                          PyArray_TYPE(%(_z)s));
            //fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
            if(!%(_zout)s) {
                PyErr_SetString(PyExc_MemoryError,
@@ -1188,7 +1188,7 @@ class Gemm(GemmRelated):
    def c_code_cache_version(self):
        gv = self.build_gemm_version()
        if gv:
-            return (4,) + gv
+            return (5,) + gv
        else:
            return gv


--- a/theano/tensor/blas_c.py
+++ b/theano/tensor/blas_c.py
@@ -91,7 +91,7 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
            || ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
                && (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
        {
-            if (%(Z)s) Py_XDECREF(%(Z)s);
+            Py_XDECREF(%(Z)s);
            %(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
                                                       PyArray_TYPE(%(A)s));
            if(!%(Z)s) {
@@ -324,7 +324,7 @@ class CGer(BaseBLAS, Ger):
        return code

    def c_code_cache_version(self):
-        return (9, blas_header_version())
+        return (10, blas_header_version())
 cger_inplace = CGer(True)
 cger_no_inplace = CGer(False)

@@ -425,9 +425,9 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail,
        if ((NULL == %(zz)s)
            || (PyArray_DIMS(%(zz)s)[0] != PyArray_DIMS(%(aa)s)[0]))
        {
-            if (%(zz)s) Py_XDECREF(%(zz)s);
+            Py_XDECREF(%(zz)s);
            %(zz)s = (PyArrayObject*)PyArray_SimpleNew(1,
-                PyArray_DIMS(%(aa)s), PyArray_TYPE((PyArrayObject*) py_%(aa)s));
+                PyArray_DIMS(%(aa)s), PyArray_TYPE(%(aa)s));
            if(!%(zz)s) {
                PyErr_SetString(PyExc_MemoryError,
                                "failed to alloc gemv output");
@@ -695,7 +695,7 @@ class CGemv(BaseBLAS, Gemv):
        return code

    def c_code_cache_version(self):
-        return (11, blas_header_version())
+        return (12, blas_header_version())
 cgemv_inplace = CGemv(inplace=True)
 cgemv_no_inplace = CGemv(inplace=False)