Merge pull request #4370 from nouiz/small

[MAIN] Doc, error message, remove optimization warning

Merge pull request #4370 from nouiz/small
457de47a · abergeron · 47367022 · 4a1a868b · 457de47a · 457de47a
--- a/Theano.pyproj
+++ b/Theano.pyproj
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup>
    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -134,14 +134,12 @@
    <Compile Include="theano\sandbox\linalg\__init__.py" />
    <Compile Include="theano\sandbox\minimal.py" />
    <Compile Include="theano\sandbox\multinomial.py" />
-    <Compile Include="theano\sandbox\neighbourhoods.py" />
    <Compile Include="theano\sandbox\neighbours.py" />
    <Compile Include="theano\sandbox\rng_mrg.py" />
    <Compile Include="theano\sandbox\softsign.py" />
    <Compile Include="theano\sandbox\solve.py" />
    <Compile Include="theano\sandbox\symbolic_module.py" />
    <Compile Include="theano\sandbox\test_multinomial.py" />
-    <Compile Include="theano\sandbox\test_neighbourhoods.py" />
    <Compile Include="theano\sandbox\test_neighbours.py" />
    <Compile Include="theano\sandbox\test_rng_mrg.py" />
    <Compile Include="theano\sandbox\test_theano_object.py" />

--- a/doc/library/compile/mode.txt
+++ b/doc/library/compile/mode.txt
@@ -22,6 +22,7 @@ Theano defines the following modes by name:
 - ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
 - ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details.
 - ``'ProfileMode'``: Deprecated, use the Theano flag :attr:`config.profile`.
+- ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>`
 - ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode.
 - ``'PROFILE_MODE'``: Deprecated, use the Theano flag :attr:`config.profile`.

--- a/doc/library/sandbox/cuda/dnn.txt
+++ b/doc/library/sandbox/cuda/dnn.txt
@@ -46,9 +46,9 @@ get an error when cuDNN can not be used with them, use this flag:
 .. note::
-   cuDNN v3 has now been released. cuDNN v2 remains supported but cuDNN v3 is
+   cuDNN v5rc is supported in Theano master version. So it dropped cuDNN v3 support.
-   faster and offers many more options. We recommend that everybody update to
+   Theano 0.8.0 and 0.8.1 support only cuDNN v3 and v4.
-   v3.
+   Theano 0.8.2 will support only v4 and v5.
 .. note::

--- a/doc/library/sandbox/gpuarray/dnn.txt
+++ b/doc/library/sandbox/gpuarray/dnn.txt
@@ -43,9 +43,9 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
 .. note::
-   cuDNN v3 has now been released. cuDNN v2 remains supported but cuDNN v3 is
+   cuDNN v5rc is supported in Theano master version. So it dropped cuDNN v3 support.
-   faster and offers many more options. We recommend that everybody update to
+   Theano 0.8.0 and 0.8.1 support only cuDNN v3 and v4.
-   v3.
+   Theano 0.8.2 will support only v4 and v5.
 .. note::

--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -142,7 +142,7 @@ Theano defines the following modes by name:
 - ``'DebugMode'``: Verify the correctness of all optimizations, and compare C and Python
    implementations. This mode can take much longer than the other modes, but can identify
    several kinds of problems.
- ``'ProfileMode'`` (deprecated): Same optimization as FAST_RUN, but print some profiling information.
+- ``'NanGuardMode'``: Same optimization as FAST_RUN, but :ref:`check if a node generate nans. <nanguardmode>`
 The default mode is typically ``FAST_RUN``, but it can be controlled via
 the configuration variable :attr:`config.mode`,
@@ -155,7 +155,6 @@ short name        Full constructor
 ``FAST_COMPILE``  ``compile.mode.Mode(linker='py', optimizer='fast_compile')``    Python implementations only, quick and cheap graph transformations
 ``FAST_RUN``      ``compile.mode.Mode(linker='cvm', optimizer='fast_run')``       C implementations where available, all available graph transformations.
 ``DebugMode``     ``compile.debugmode.DebugMode()``                               Both implementations where available, all available graph transformations.
-``ProfileMode``   ``compile.profilemode.ProfileMode()``                           Deprecated. C implementations where available, all available graph transformations, print profile information.
 ================= =============================================================== ===============================================================================
 .. Note::
@@ -169,8 +168,8 @@ Linkers
 =======
 A mode is composed of 2 things: an optimizer and a linker. Some modes,
-like ``ProfileMode`` and ``DebugMode``, add logic around the optimizer and
+like ``NanGuardMode`` and ``DebugMode``, add logic around the optimizer and
-linker. ``ProfileMode`` and ``DebugMode`` use their own linker.
+linker. ``NanGuardMode`` and ``DebugMode`` use their own linker.
 You can select which linker to use with the Theano flag :attr:`config.linker`.
 Here is a table to compare the different linkers.
@@ -184,7 +183,7 @@ c|py [#cpy1]_  yes        yes                "+++"      Try C code. If none exis
 c|py_nogc      no         yes                "++"       As c|py, but without gc
 c              no         yes                "+"        Use only C code (if none available for an op, raise an error)
 py             yes        yes                "+++"      Use only Python code
-ProfileMode    no         no                 "++++"     (Deprecated) Compute some extra profiling info
+NanGuardMode    no         no                 "++++"    Check if nodes generate NaN
 DebugMode      no         yes                VERY HIGH  Make many checks on what Theano computes
 =============  =========  =================  =========  ===
@@ -259,123 +258,3 @@ ProfileMode
 .. note::
    ProfileMode is deprecated. Use :attr:`config.profile` instead.
-Besides checking for errors, another important task is to profile your
-code. For this Theano uses a special mode called ProfileMode which has
-to be passed as an argument to :func:`theano.function <function.function>`.
-Using the ProfileMode is a three-step process.
-.. note::
-    To switch the default accordingly, set the Theano flag
-    :attr:`config.mode` to ProfileMode.  In that case, when the Python
-    process exits, it will automatically print the profiling
-    information on the standard output.
-    The memory profile of the output of each ``apply`` node can be enabled with the
-    Theano flag :attr:`config.ProfileMode.profile_memory`.
-For more detail, see :ref:`ProfileMode <profilemode>` in the library.
-Creating a ProfileMode Instance
-------------------------------
-First create a ProfileMode instance:
->>> from theano import ProfileMode
->>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
-The ProfileMode constructor takes as input an optimizer and a
-linker. Which optimizer and linker to use will depend on the
-application. For example, a user wanting to profile the Python
-implementation only, should use the gof.PerformLinker (or "py" for
-short). On the other hand, a user wanting to profile his graph using C
-implementations wherever possible should use the ``gof.OpWiseCLinker``
-(or "c|py"). For testing the speed of your code we would recommend
-using the ``fast_run`` optimizer and the ``gof.OpWiseCLinker`` linker.
-Compiling your Graph with ProfileMode
-------------------------------------
-Once the ProfileMode instance is created, simply compile your graph as you
-would normally, by specifying the mode parameter.
->>> v1, v2 = T.vectors(2)
->>> o = v1 + v2
->>> f = theano.function([v1,v2],[o], mode=profmode)
-Retrieving Timing Information
-----------------------------
-Once your graph is compiled, simply run the program or operation you wish to
-profile, then call ``profmode.print_summary()``. This will provide you with
-the desired timing information, indicating where your graph is spending most
-of its time. This is best shown through an example. Let's use our logistic
-regression example.
-Compiling the module with ``ProfileMode`` and calling ``profmode.print_summary()``
-generates the following output:
-.. code-block:: python
-    """
-    ProfileMode.print_summary()
-    ---------------------------
-    local_time 0.0749197006226 (Time spent running thunks)
-    Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)
-            0.069   15      _dot22
-            0.064   1       _dot22
-            0.053   0       InplaceDimShuffle{x,0}
-            0.049   2       InplaceDimShuffle{1,0}
-            0.049   10      mul
-            0.049   6       Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
-            0.049   3       InplaceDimShuffle{x}
-            0.049   4       InplaceDimShuffle{x,x}
-            0.048   14      Sum{0}
-            0.047   7       sub
-            0.046   17      mul
-            0.045   9       sqr
-            0.045   8       Elemwise{sub}
-            0.045   16      Sum
-            0.044   18      mul
-       ... (remaining 6 Apply instances account for 0.25 of the runtime)
-    Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>
-            0.139   * mul
-            0.134   * _dot22
-            0.092   * sub
-            0.085   * Elemwise{Sub{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1779f10>}}[(0, 0)]
-            0.053   * InplaceDimShuffle{x,0}
-            0.049   * InplaceDimShuffle{1,0}
-            0.049   * Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
-            0.049   * InplaceDimShuffle{x}
-            0.049   * InplaceDimShuffle{x,x}
-            0.048   * Sum{0}
-            0.045   * sqr
-            0.045   * Sum
-            0.043   * Sum{1}
-            0.042   * Elemwise{Mul{output_types_preference=<theano.scalar.basic.transfer_type object at 0x17a0f50>}}[(0, 1)]
-            0.041   * Elemwise{Add{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736a50>}}[(0, 0)]
-            0.039   * Elemwise{Second{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736d90>}}[(0, 1)]
-       ... (remaining 0 Ops account for 0.00 of the runtime)
-    (*) Op is running a c implementation
-    """
-This output has two components. In the first section called
-*Apply-wise summary*, timing information is provided for the worst
-offending ``Apply`` nodes. This corresponds to individual op applications
-within your graph which took longest to execute (so if you use
-``dot`` twice, you will see two entries there). In the second portion,
-the *Op-wise summary*, the execution time of all ``Apply`` nodes executing
-the same op are grouped together and the total execution time per op
-is shown (so if you use ``dot`` twice, you will see only one entry
-there corresponding to the sum of the time spent in each of them).
-Finally, notice that the ``ProfileMode`` also shows which ops were running a C
-implementation.
-For more detail, see :ref:`ProfileMode<profilemode>` in the library.
--- a/setup.py
+++ b/setup.py
@@ -52,9 +52,9 @@ AUTHOR              = "LISA laboratory, University of Montreal"
 AUTHOR_EMAIL        = "theano-dev@googlegroups.com"
 PLATFORMS           = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"]
 MAJOR               = 0
-MINOR               = 8
+MINOR               = 9
 MICRO               = 0
-SUFFIX              = ""  # Should be blank except for rc's, betas, etc.
+SUFFIX              = "dev0"  # Should be blank except for rc's, betas, etc.
 ISRELEASED          = False
 VERSION             = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX)

--- a/theano/gof/fg.py
+++ b/theano/gof/fg.py
@@ -49,8 +49,19 @@ class MissingInputError(Exception):
    A symbolic input needed to compute the outputs is missing.
    """
+    def __init__(self, *args, **kwargs):
-    pass
+        if kwargs:
+            # The call to list is needed for Python 3
+            assert list(kwargs.keys()) == ["variable"]
+            tr = getattr(list(kwargs.values())[0].tag, 'trace', [])
+            if type(tr) is list and len(tr) > 0:
+                sio = StringIO()
+                print("\nBacktrace when the variable is created:", file=sio)
+                for subtr in list(kwargs.values())[0].tag.trace:
+                    traceback.print_list(subtr, sio)
+                args = args + (str(sio.getvalue()),)
+        s = '\n'.join(args)  # Needed to have the new line print correctly
+        Exception.__init__(self, s)
 class FunctionGraph(utils.object2):
@@ -364,7 +375,7 @@ class FunctionGraph(utils.object2):
            if isinstance(variable.type, NullType):
                raise TypeError("Computation graph contains a NaN. " +
                                variable.type.why_null)
-            raise MissingInputError("Undeclared input", variable)
+            raise MissingInputError("Undeclared input", variable=variable)
        if not getattr(variable, 'fgraph', None) is self:
            self.__setup_r__(variable)
        self.variables.add(variable)
@@ -392,78 +403,6 @@ class FunctionGraph(utils.object2):
                    if (r.owner is None and
                            not isinstance(r, graph.Constant) and
                            r not in self.inputs):
-                        # Verbose error message
-                        # Show a complete chain of variables from the missing input to an output
-                        if config.exception_verbosity == 'high':
-                            def find_path_to(output_var, input_var):
-                                """
-                                Returns a list of each variable on a (not
-                                necessarily unique) path from input_var to
-                                output_var, where each variable in the list has
-                                the preceding variable as one of its inputs.
-                                Returns None if no path exists.
-                                """
-                                # If output and input are the same we have a singleton path
-                                if output_var is input_var:
-                                    return [output_var]
-                                # If output has no inputs then there is no path
-                                owner = output_var.owner
-                                if owner is None:
-                                    return None
-                                # If input_var is an input to the output node, there is a
-                                # simple two element path
-                                inputs = owner.inputs
-                                if input_var in inputs:
-                                    return [input_var, output_var]
-                                # Otherwise we must recurse by searching for a path to one
-                                # of our inputs, then appending the output to that path
-                                for ipt in inputs:
-                                    path = find_path_to(ipt, input_var)
-                                    if path is not None:
-                                        path.append(output_var)
-                                        return path
-                                # Since none of the above methods returned a path, there is none
-                                return None
-                            # Try different outputs until we find one that has a path to the missing input
-                            for output in self.outputs:
-                                path = find_path_to(output, r)
-                                if path is not None:
-                                    break
-                            # if there is no path then r isn't really a graph input so we shouldn't be running error
-                            # handler code in the first place
-                            assert path is not None
-                            tr = getattr(r.tag, 'trace', [])
-                            detailed_err_msg = ""
-                            if type(tr) is list and len(tr) > 0:
-                                detailed_err_msg += "\nBacktrace when the variable is created:\n"
-                                # Print separate message for each element in
-                                # the list of batcktraces
-                                sio = StringIO()
-                                for subtr in tr:
-                                    traceback.print_list(subtr, sio)
-                                detailed_err_msg += str(sio.getvalue())
-                            raise MissingInputError(
-                                'A variable that is an input to the graph was '
-                                'neither provided as an input to the function '
-                                'nor given a value. A chain of variables '
-                                'leading from this input to an output is %s. '
-                                'This chain may not be unique' % str(path) +
-                                detailed_err_msg)
                        # Standard error message
                        raise MissingInputError((
                            "An input of the graph, used to compute %s, "
@@ -471,7 +410,7 @@ class FunctionGraph(utils.object2):
                            "Use the Theano flag exception_verbosity='high',"
                            "for more information on this error."
                            % str(node)),
-                            r)
+                            variable=r)
        for node in new_nodes:
            assert node not in self.apply_nodes

--- a/theano/sandbox/neighbourhoods.py
+++ b/theano/sandbox/neighbourhoods.py
-"""
-.. warning:: This code is not recommanded. It is not finished, it is
-slower than the version in sandbox/neighbours.py, and it does not work
-on the GPU.
-We only keep this version here as it is a little bit more generic, so
-it cover more cases. But thoses cases aren't needed frequently, so you
-probably don't want to use this version, go see neighbours.py!!!!!!!
-"""
-from __future__ import absolute_import, print_function, division
-import numpy
-from six.moves import xrange
-import six.moves.builtins as builtins
-import theano
-from theano import gof, Op
-class NeighbourhoodsFromImages(Op):
-    """
-    This extracts neighbourhoods from "images", but in a dimension-generic
-    manner.
-    In the 2D case, this is similar to downsampling, but instead of reducing
-    a group of 2x2 pixels (for example) to a single new pixel in the output,
-    you place those 4 pixels in a row.
-    For example, say you have this 2x4 image::
-            [ [ 0.5, 0.6, 0.7, 0.8 ],
-              [ 0.1, 0.2, 0.3, 0.4 ] ]
-    and you want to extract 2x2 neighbourhoods. This op would then produce::
-            [ [ [ 0.5, 0.6, 0.1, 0.2 ] ], # the first 2x2 group of pixels
-              [ [ 0.7, 0.8, 0.3, 0.4 ] ] ] # the second one
-    So think of a 2D downsampling where each pixel of the resulting array
-    is replaced by an array containing the (flattened) pixels of the
-    corresponding neighbourhood.
-    If you provide a stack of 2D images, or multiple stacks, each image
-    will be treated independently, and the first dimensions of the array
-    will be preserved as such.
-    This also makes sense in the 1D or 3D case. Below I'll still be calling
-    those "images", by analogy.
-    In the 1D case, you're extracting subsequences from the original sequence.
-    In the 3D case, you're extracting cuboids.
-    If you ever find a 4D use, tell me! It should be possible, anyhow.
-    Parameters
-    ----------
-    n_dims_before : int
-        Number of dimensions preceding the "images".
-    dims_neighbourhoods : tuple of ints
-        Exact shape of windows to be extracted (e.g. (2,2) in the case above).
-        n_dims_before + len(dims_neighbourhoods) should be equal to the
-        number of dimensions in the input given to the op.
-    strides : tuple of int
-        Number of elements to skip when moving to the next neighbourhood,
-        for each dimension of dims_neighbourhoods. There can be overlap
-        between neighbourhoods, or gaps.
-    ignore_border : bool
-        If the dimensions of the neighbourhoods don't exactly divide the
-        dimensions of the "images", you can either fill the last
-        neighbourhood with zeros (False) or drop it entirely (True).
-    inverse : bool
-        You shouldn't have to use this. Only used by child class
-        ImagesFromNeighbourhoods which simply reverses the assignment.
-    """
-    __props__ = ("n_dims_before", "dims_neighbourhoods", "strides",
-                 "ignore_border", "inverse")
-    def __init__(self, n_dims_before, dims_neighbourhoods,
-                 strides=None, ignore_border=False, inverse=False):
-        self.n_dims_before = n_dims_before
-        self.dims_neighbourhoods = dims_neighbourhoods
-        if strides is not None:
-            self.strides = strides
-        else:
-            self.strides = dims_neighbourhoods
-        self.ignore_border = ignore_border
-        self.inverse = inverse
-        self.code_string, self.code = self.make_py_code()
-    def __str__(self):
-        return '%s{%s,%s,%s,%s}' % (self.__class__.__name__,
-                                    self.n_dims_before,
-                                    self.dims_neighbourhoods,
-                                    self.strides,
-                                    self.ignore_border)
-    def out_shape(self, input_shape):
-        dims = list(input_shape[:self.n_dims_before])
-        num_strides = [0 for i in xrange(len(self.strides))]
-        neigh_flattened_dim = 1
-        for i, ds in enumerate(self.dims_neighbourhoods):
-            cur_stride = self.strides[i]
-            input_dim = input_shape[i + self.n_dims_before]
-            target_dim = input_dim // cur_stride
-            if not self.ignore_border and (input_dim % cur_stride) != 0:
-                target_dim += 1
-            num_strides[i] = target_dim
-            dims.append(target_dim)
-            neigh_flattened_dim *= ds
-        dims.append(neigh_flattened_dim)
-        return dims, num_strides
-    # for inverse mode
-    # "output" here actually referes to the Op's input shape (but it's inverse
-    # mode)
-    def in_shape(self, output_shape):
-        out_dims = list(output_shape[:self.n_dims_before])
-        num_strides = []
-        # in the inverse case we don't worry about borders:
-        # they either have been filled with zeros, or have been cropped
-        for i, ds in enumerate(self.dims_neighbourhoods):
-            # the number of strides performed by NeighFromImg is
-            # directly given by this shape
-            num_strides.append(output_shape[self.n_dims_before + i])
-            # our Op's output image must be at least this wide
-            at_least_width = num_strides[i] * self.strides[i]
-            # ... which gives us this number of neighbourhoods
-            num_neigh = at_least_width // ds
-            if at_least_width % ds != 0:
-                num_neigh += 1
-            # making the final Op's output dimension this wide
-            out_dims.append(num_neigh * ds)
-        return out_dims, num_strides
-    def make_node(self, x):
-        x = theano.tensor.as_tensor_variable(x)
-        if self.inverse:
-            # +1 in the inverse case
-            if x.type.ndim != (self.n_dims_before +
-                               len(self.dims_neighbourhoods) + 1):
-                raise TypeError()
-        else:
-            if x.type.ndim != (self.n_dims_before +
-                               len(self.dims_neighbourhoods)):
-                raise TypeError()
-        return gof.Apply(self, [x], [x.type()])
-    def perform(self, node, inp, out):
-        x, = inp
-        z, = out
-        if self.inverse:
-            # +1 in the inverse case
-            if len(x.shape) != (self.n_dims_before +
-                                len(self.dims_neighbourhoods) + 1):
-                raise ValueError("Images passed as input don't match the "
-                                 "dimensions passed when this (inversed) "
-                                 "Apply node was created")
-            prod = 1
-            for dim in self.dims_neighbourhoods:
-                prod *= dim
-            if x.shape[-1] != prod:
-                raise ValueError(
-                    "Last dimension of neighbourhoods (%s) is not"
-                    " the product of the neighbourhoods dimensions"
-                    " (%s)" % (str(x.shape[-1]), str(prod)))
-        else:
-            if len(x.shape) != (self.n_dims_before +
-                                len(self.dims_neighbourhoods)):
-                raise ValueError("Images passed as input don't match the "
-                                 "dimensions passed when this Apply node "
-                                 "was created")
-        if self.inverse:
-            input_shape, num_strides = self.in_shape(x.shape)
-            out_shape, dummy = self.out_shape(input_shape)
-        else:
-            input_shape = x.shape
-            out_shape, num_strides = self.out_shape(input_shape)
-        if z[0] is None:
-            if self.inverse:
-                z[0] = numpy.zeros(input_shape)
-            else:
-                z[0] = numpy.zeros(out_shape)
-            z[0] = theano._asarray(z[0], dtype=x.dtype)
-        exec(self.code)
-    def make_py_code(self):
-        # TODO : need description for method and return
-        code = self._py_outerloops()
-        for i in xrange(len(self.strides)):
-            code += self._py_innerloop(i)
-        code += self._py_assignment()
-        return code, builtins.compile(code, '<string>', 'exec')
-    def _py_outerloops(self):
-        # TODO : need description for method, parameter and return
-        code_before = ""
-        for dim_idx in xrange(self.n_dims_before):
-            code_before += ('\t' * (dim_idx)) + \
-                "for outer_idx_%d in xrange(input_shape[%d]):\n" % \
-                (dim_idx, dim_idx)
-        return code_before
-    def _py_innerloop(self, inner_dim_no):
-        # TODO : need description for method, parameter and return
-        base_indent = ('\t' * (self.n_dims_before + inner_dim_no * 2))
-        code_before = base_indent + \
-            "for stride_idx_%d in xrange(num_strides[%d]):\n" % \
-            (inner_dim_no, inner_dim_no)
-        base_indent += '\t'
-        code_before += base_indent + \
-            "dim_%d_offset = stride_idx_%d * self.strides[%d]\n" %\
-            (inner_dim_no, inner_dim_no, inner_dim_no)
-        code_before += base_indent + \
-            "max_neigh_idx_%d = input_shape[%d] - dim_%d_offset\n" % \
-            (inner_dim_no, self.n_dims_before + inner_dim_no, inner_dim_no)
-        code_before += base_indent + \
-            ("for neigh_idx_%d in xrange(min(max_neigh_idx_%d,"
-             " self.dims_neighbourhoods[%d])):\n") %\
-            (inner_dim_no, inner_dim_no, inner_dim_no)
-        return code_before
-    def _py_flattened_idx(self):
-        # TODO : need description for method and return
-        return "+".join(["neigh_strides[%d]*neigh_idx_%d" % (i, i)
-                        for i in xrange(len(self.strides))])
-    def _py_assignment(self):
-        # TODO : need description for method and return
-        input_idx = "".join(["outer_idx_%d," % (i,)
-                            for i in xrange(self.n_dims_before)])
-        input_idx += "".join(["dim_%d_offset+neigh_idx_%d," %
-                             (i, i) for i in xrange(len(self.strides))])
-        out_idx = "".join(
-            ["outer_idx_%d," % (i,) for i in xrange(self.n_dims_before)] +
-            ["stride_idx_%d," % (i,) for i in xrange(len(self.strides))])
-        out_idx += self._py_flattened_idx()
-        # return_val = '\t' * (self.n_dims_before + len(self.strides)*2)
-        # return_val += "print "+input_idx+"'\\n',"+out_idx+"\n"
-        return_val = '\t' * (self.n_dims_before + len(self.strides) * 2)
-        if self.inverse:
-            # remember z and x are inversed:
-            # z is the Op's output, but has input_shape
-            # x is the Op's input, but has out_shape
-            return_val += "z[0][%s] = x[%s]\n" % (input_idx, out_idx)
-        else:
-            return_val += "z[0][%s] = x[%s]\n" % (out_idx, input_idx)
-        return return_val
-class ImagesFromNeighbourhoods(NeighbourhoodsFromImages):
-    # TODO : need description for class, parameters
-    def __init__(self, n_dims_before, dims_neighbourhoods,
-                 strides=None, ignore_border=False):
-        NeighbourhoodsFromImages.__init__(self, n_dims_before,
-                                          dims_neighbourhoods,
-                                          strides=strides,
-                                          ignore_border=ignore_border,
-                                          inverse=True)
-        # and that's all there is to it
--- a/theano/sandbox/tests/test_neighbourhoods.py
+++ b/theano/sandbox/tests/test_neighbourhoods.py
-from __future__ import absolute_import, print_function, division
-#!/usr/bin/python
-import theano
-import numpy
-import theano.tensor as T
-from theano.sandbox.neighbourhoods import *
-'''
-def test_imgFromNeigh_noborder_1d():
-    x = T.dtensor3()
-    a = numpy.arange(2*2*6).reshape((2,2,6))
-    neighs = NeighbourhoodsFromImages(2, (3,))(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[[  0.,   1.,   2.],
-        [  3.,   4.,   5.]],
-        [[  6.,   7.,   8.],
-        [  9.,  10.,  11.]]],
-        [[[ 12.,  13.,  14.],
-        [ 15.,  16.,  17.]],
-        [[ 18.,  19.,  20.],
-        [ 21.,  22.,  23.]]]])
-    assert numpy.allclose(z, cmp)
-    x2 = T.dtensor4()
-    imgs = ImagesFromNeighbourhoods(2, (3,))(x2)
-    f2 = theano.function([x2], imgs)
-    z2 = f2(cmp)
-    assert numpy.allclose(z2, a)
-def test_imgFromNeigh_1d_stridesmaller():
-    x = T.dtensor3()
-    a = numpy.arange(2*4).reshape((2,4))
-    #neighs = NeighbourhoodsFromImages(1, (3,), strides=(1,), ignore_border=False)(x)
-    cmp = numpy.asarray([[[0.,1.,2.],[1.,2.,3.],[2.,3.,0.],[3.,0.,0.]],\
-                [[4.,5.,6.],[5.,6.,7.],[6.,7.,0.],[7.,0.,0.]]])
-    images = ImagesFromNeighbourhoods(1, (3,), strides=(1,), ignore_border=False)(x)
-    f = theano.function([x], images)
-    aprime = f(cmp)
-    should_be = [[0.,  1.,  2.,  3.,  0.,  0.], [ 4.,  5.,  6.,  7.,  0.,  0.]]
-    assert numpy.allclose(aprime, should_be)
-def test_neighFromImg_1d():
-    x = T.dtensor3()
-    a = numpy.arange(2*2*6).reshape((2,2,6))
-    neighs = NeighbourhoodsFromImages(2, (3,))(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[[  0.,   1.,   2.],
-        [  3.,   4.,   5.]],
-        [[  6.,   7.,   8.],
-        [  9.,  10.,  11.]]],
-        [[[ 12.,  13.,  14.],
-        [ 15.,  16.,  17.]],
-        [[ 18.,  19.,  20.],
-        [ 21.,  22.,  23.]]]])
-    assert numpy.allclose(z, cmp)
-def test_neighFromImg_1d_ignoreborder():
-    x = T.dtensor3()
-    a = numpy.arange(1*2*7).reshape((1,2,7))
-    neighs = NeighbourhoodsFromImages(2, (3,), ignore_border=True)(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[[  0.,   1.,   2.],
-        [  3.,   4.,   5.]],
-        [[  7.,   8.,  9.],
-        [  10.,  11.,  12.]]]])
-    assert numpy.allclose(z, cmp)
-def test_neighFromImg_1d_stridesmaller():
-    x = T.dmatrix()
-    a = numpy.arange(2*4).reshape((2,4))
-    neighs = NeighbourhoodsFromImages(1, (3,), strides=(1,), ignore_border=False)(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[0.,1.,2.],[1.,2.,3.],[2.,3.,0.],[3.,0.,0.]],\
-                [[4.,5.,6.],[5.,6.,7.],[6.,7.,0.],[7.,0.,0.]]])
-    assert numpy.allclose(z, cmp)
-def test_neighFromImg_1d_stridesbigger():
-    x = T.dmatrix()
-    a = numpy.arange(2*4).reshape((2,4))
-    neighs = NeighbourhoodsFromImages(1, (2,), strides=(3,), ignore_border=False)(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[0.,1.],[3.,0.]],\
-                [[4.,5.],[7.,0.]]])
-    assert numpy.allclose(z, cmp)
-def test_neighFromImg_2d():
-    x = T.dtensor3()
-    a = numpy.arange(2*5*3).reshape((2,5,3))
-    neighs = NeighbourhoodsFromImages(1, (2,2), ignore_border=False)(x)
-    f = theano.function([x], neighs)
-    z = f(a)
-    cmp = numpy.asarray([[[[  0.,   1.,   3.,   4.,],
-           [  2.,   0.,   5.,   0.,]],
-          [[  6.,   7.,   9.,  10.,],
-           [  8.,   0.,  11.,   0.,]],
-          [[ 12.,  13.,   0.,   0.,],
-           [ 14.,   0.,   0.,   0.,]]],
-         [[[ 15.,  16.,  18.,  19.,],
-           [ 17.,   0.,  20.,   0.,]],
-          [[ 21.,  22.,  24.,  25.,],
-           [ 23.,   0.,  26.,   0.,]],
-          [[ 27.,  28.,   0.,   0.,],
-           [ 29.,   0.,   0.,   0.,]]]])
-    assert numpy.allclose(z, cmp)
-if __name__ == '__main__':
-    numpy.set_printoptions(threshold=numpy.nan)
-    test_neighFromImg_1d()
-    test_neighFromImg_1d_ignoreborder()
-    test_neighFromImg_1d_stridesmaller()
-    test_neighFromImg_1d_stridesbigger()
-    test_neighFromImg_2d()
-    test_imgFromNeigh_noborder_1d()
-    test_imgFromNeigh_1d_stridesmaller()
-'''
--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -129,7 +129,8 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
    if image_shape and filter_shape:
        try:
-            assert image_shape[1] == filter_shape[1]
+            if image_shape[1] is not None and filter_shape[1] is not None:
+                assert image_shape[1] == filter_shape[1]
        except Exception:
            print('image ', image_shape, ' filters ', filter_shape)
            raise

--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -89,7 +89,6 @@ whitelist_flake8 = [
    "sandbox/__init__.py",
    "sandbox/tests/test_theano_object.py",
    "sandbox/tests/test_scan.py",
-    "sandbox/tests/test_neighbourhoods.py",
    "sandbox/tests/__init__.py",
    "sandbox/cuda/var.py",
    "sandbox/cuda/GpuConvGrad3D.py",