Reimplement shape_i to not disable shape optimization in some cases

3c1025e4 · Frederic · ae0797b3 · 3c1025e4 · 3c1025e4
--- a/theano/compile/ops.py
+++ b/theano/compile/ops.py
@@ -387,17 +387,34 @@ class Shape_i(gof.Op):
        return [None]
-def shape_i(var, i):
+def shape_i(var, i, fgraph=None):
-    """This is useful in optimization that need to get the shape. This
+    """Equivalent of var.shape[i], but apply if possible the shape
+    feature optimization
+    This is useful in optimization that need to get the shape. This
    remove the need of the following shape_feature optimization that
    convert it. So this speed up optimization and remove Equilibrium
    max iteration problems.
+    :param var: the variable we want to take the shape of
+    :param i: The shape dimensions we want
+    :param fgraph: optional. If var.fgraph do not exist, the fgraph that
+        have the shape_feature to introduce var in to get the optimized shape.
    """
-    if (hasattr(var, 'fgraph') and
+    if fgraph is None and hasattr(var, 'fgraph'):
-        hasattr(node.outputs[0].fgraph, 'shape_feature')):
+        fgraph = var.fgraph
-        return node.outputs[0].fgraph.shape_feature.shape_of[var][i]
+    if fgraph and hasattr(fgraph, 'shape_feature'):
-    return Shape_i(i)(var)
+        if var not in fgraph.shape_feature.shape_of:
+            # If var isn't in the ShapeFeature, add it.
+            fgraph.shape_feature.on_import(fgraph, var.owner,
+                                           'gof.ops.shape_i')
+        return fgraph.shape_feature.shape_of[var][i]
+    # If we are not able to use the shape feature, we should not put
+    # Shape_i in the graph. Otherwise, the shape feature optimization
+    # won't get applied.
+    return var.shape[i]
 def register_shape_i_c_code(typ, code, check_input, version=()):

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """
+    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    if (border_mode == 'valid' and subsample == (1,1) and
        direction_hint == 'bprop weights'):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        img = gpu_contiguous(img)
        kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
        conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
+        shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
-        shape = theano.tensor.stack(shape_i(img, 0), shape_i(kerns, 1),
+        shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
-                                    shape_i(img, 2) + shape_i(kerns, 2) - 1,
+        shape = theano.tensor.stack(shape_i(img, 0, fgraph),
-                                    shape_i(img, 3) + shape_i(kerns, 3)- 1)
+                                    shape_i(kerns, 1, fgraph),
+                                    shape2, shape3)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
                              conv_mode=conv_mode)(shape, kerns.shape)
-        return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3])
+        return GpuDnnConvGradI()(kerns, img, desc, shape2, shape3)
    # Standard case: We use GpuDnnConv with suitable padding.
    img = gpu_contiguous(img)