Merge pull request #2483 from nouiz/mixed

Mixed

Merge pull request #2483 from nouiz/mixed
75cd8839 · abergeron · 7a78f453 · 2c581ec1 · 75cd8839 · 75cd8839
--- a/theano/compile/ops.py
+++ b/theano/compile/ops.py
@@ -4,6 +4,7 @@ and Ops building class (:class:`FromFunctionOp`) and decorator

 """
 import copy
+import cPickle
 import warnings

 import theano
@@ -387,17 +388,34 @@ class Shape_i(gof.Op):
        return [None]


-def shape_i(var, i):
-    """This is useful in optimization that need to get the shape. This
+def shape_i(var, i, fgraph=None):
+    """Equivalent of var.shape[i], but apply if possible the shape
+    feature optimization
+
+    This is useful in optimization that need to get the shape. This
    remove the need of the following shape_feature optimization that
    convert it. So this speed up optimization and remove Equilibrium
    max iteration problems.

+    :param var: the variable we want to take the shape of
+    :param i: The shape dimensions we want
+    :param fgraph: optional. If var.fgraph do not exist, the fgraph that
+        have the shape_feature to introduce var in to get the optimized shape.
+
    """
-    if (hasattr(var, 'fgraph') and
-        hasattr(node.outputs[0].fgraph, 'shape_feature')):
-        return node.outputs[0].fgraph.shape_feature.shape_of[var][i]
-    return Shape_i(i)(var)
+    if fgraph is None and hasattr(var, 'fgraph'):
+        fgraph = var.fgraph
+    if fgraph and hasattr(fgraph, 'shape_feature'):
+        if var not in fgraph.shape_feature.shape_of:
+            # If var isn't in the ShapeFeature, add it.
+            fgraph.shape_feature.on_import(fgraph, var.owner,
+                                           'gof.ops.shape_i')
+        return fgraph.shape_feature.shape_of[var][i]
+
+    # If we are not able to use the shape feature, we should not put
+    # Shape_i in the graph. Otherwise, the shape feature optimization
+    # won't get applied.
+    return var.shape[i]


 def register_shape_i_c_code(typ, code, check_input, version=()):
@@ -474,17 +492,20 @@ class FromFunctionOp(gof.Op):
        try:
            obj = load_back(mod, name)
        except (ImportError, KeyError, AttributeError):
-            raise PicklingError("Can't pickle as_op(), not found as %s.%s" %
-                                (mod, name))
+            raise cPickle.PicklingError(
+                "Can't pickle as_op(), not found as %s.%s" %
+                (mod, name))
        else:
            if obj is not self:
-                raise PicklingError("Can't pickle as_op(), not the object "
-                                    "at %s.%s" % (mod, name))
+                raise cPickle.PicklingError(
+                    "Can't pickle as_op(), not the object "
+                    "at %s.%s" % (mod, name))
        return load_back, (mod, name)

    def _infer_shape(self, node, input_shapes):
        return self.__infer_shape(node, input_shapes)

+
 def as_op(itypes, otypes, infer_shape=None):
    """
    Decorator that converts a function into a basic Theano op that

--- a/theano/sandbox/cuda/cudnn_helper.h
+++ b/theano/sandbox/cuda/cudnn_helper.h
@@ -146,6 +146,14 @@ cudnnConvolutionBackwardData_v2(

 #define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2

+//Needed for R2 rc2
+# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
+#else
+
+// r2 rc1 and rc2 do not have the same macro defined
+// I didn't checked if this the right combination, but as we do not wrap the padding interface, it is fine for now.
+# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ((cudnnPoolingMode_t)1)
+
 #endif

 #endif
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """
+    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    if (border_mode == 'valid' and subsample == (1,1) and
        direction_hint == 'bprop weights'):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        img = gpu_contiguous(img)
        kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
        conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
-
-        shape = theano.tensor.stack(shape_i(img, 0), shape_i(kerns, 1),
-                                    shape_i(img, 2) + shape_i(kerns, 2) - 1,
-                                    shape_i(img, 3) + shape_i(kerns, 3)- 1)
+        shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
+        shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
+        shape = theano.tensor.stack(shape_i(img, 0, fgraph),
+                                    shape_i(kerns, 1, fgraph),
+                                    shape2, shape3)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
                              conv_mode=conv_mode)(shape, kerns.shape)
-        return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3])
+        return GpuDnnConvGradI()(kerns, img, desc, shape2, shape3)

    # Standard case: We use GpuDnnConv with suitable padding.
    img = gpu_contiguous(img)
@@ -662,7 +664,7 @@ class GpuDnnPoolDesc(GpuOp):
        if self.mode == 'max':
            mode_flag = 'CUDNN_POOLING_MAX'
        elif self.mode == "average":
-            mode_flag = 'CUDNN_POOLING_AVERAGE'
+            mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
        else:
            raise NotImplementedError("Unsupported pooling model.")


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -964,7 +964,7 @@ class ConvOp(OpenMPOp):
        return ['<numpy/noprefix.h>', '<iostream>', '<sstream>']

    def c_code_cache_version(self):
-        return (12, self.openmp, blas.blas_header_version())
+        return (13, self.openmp, blas.blas_header_version())

    def c_support_code(self):
        return """
@@ -1194,7 +1194,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
    dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s));
  }
 """ % d
-            d["assert_size"] = ""
+            d["assert_size"] = """
+// Check the stack size of the filter and images are equals
+if(kerns_dim[1] != img2d_dim[1]){
+    PyErr_Format(PyExc_ValueError,
+            "the filter stack size (%%ld) and image stack size (%%ld) differ",
+            (long)kerns_dim[1], (long)img2d_dim[1]);
+    %(fail)s;
+}
+            """ % sub

        if self.kshp_logical_top_aligned:
            d["self_kshp_logical_offset_r"] = 0