Merge branch 'shape_feature' of github.com:nouiz/Theano into opt

f292f7e2 · Frederic Bastien · f452c040 · f605f504 · f292f7e2 · f292f7e2
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -710,18 +710,14 @@ def local_gpua_careduce(node, context_name):
                    assert reduce_mask[a] == 0
                    reduce_mask[a] = 1

-            shape_of = node.fgraph.shape_feature.shape_of
-
-            x_shape = shape_of[x]
-
-            new_in_shp = [x_shape[0]]
+            new_in_shp = [shape_i(x, 0)]
            new_mask = [reduce_mask[0]]
            for i in xrange(1, x.type.ndim):
                if reduce_mask[i] == reduce_mask[i - 1]:
-                    new_in_shp[-1] *= x_shape[i]
+                    new_in_shp[-1] *= shape_i(x, i)
                else:
                    new_mask.append(reduce_mask[i])
-                    new_in_shp.append(x_shape[i])
+                    new_in_shp.append(shape_i(x, i))
            new_axis = []
            for idx, m in enumerate(new_mask):
                if m == 1:
@@ -743,8 +739,12 @@ def local_gpua_careduce(node, context_name):
                    greduce(gpu_reshaped_x))

                if reduce_reshaped_x.ndim != node.outputs[0].ndim:
+                    out_shp = []
+                    for i in range(x.ndim):
+                        if i not in node.op.axis:
+                            out_shp.append(shape_i(x, i))
                    unreshaped_reduce = reduce_reshaped_x.reshape(
-                        tensor.stack(shape_of[node.outputs[0]]))
+                        tensor.stack(out_shp))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -14,6 +14,7 @@ from . import dnn
 import theano
 from theano import scalar as scal
 from theano import config, tensor, gof
+from theano.compile.ops import shape_i
 import theano.ifelse
 import theano.tensor.signal.pool
 import theano.tensor.nnet
@@ -899,18 +900,14 @@ def local_gpu_careduce(node):
                    # to make them a single dimension, do the reduction, and
                    # then reshape to get them back.

-                    shape_of = node.fgraph.shape_feature.shape_of
-
-                    x_shape = shape_of[x]
-
-                    new_in_shp = [x_shape[0]]
+                    new_in_shp = [shape_i(x, 0)]
                    new_mask = [reduce_mask[0]]
                    for i in xrange(1, x.type.ndim):
                        if reduce_mask[i] == reduce_mask[i - 1]:
-                            new_in_shp[-1] *= x_shape[i]
+                            new_in_shp[-1] *= shape_i(x, i)
                        else:
                            new_mask.append(reduce_mask[i])
-                            new_in_shp.append(x_shape[i])
+                            new_in_shp.append(shape_i(x, i))

                    new_greduce = GpuCAReduce(new_mask, scalar_op)
                    new_x = x.reshape(tensor.stack(new_in_shp))
@@ -935,8 +932,11 @@ def local_gpu_careduce(node):

                    # Restore the expected shape of the output
                    if rval.ndim != out.ndim:
-                        rval = rval.reshape(
-                            tensor.stack(shape_of[out]))
+                        out_shp = []
+                        for i in range(x.ndim):
+                            if i not in node.op.axis:
+                                out_shp.append(shape_i(x, i))
+                        rval = rval.reshape(tensor.stack(out_shp))

                if rval.type == out.type:
                    return [rval]

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1260,6 +1260,12 @@ class ShapeFeature(object):
        for node in fgraph.toposort():
            self.on_import(fgraph, node, reason='on_attach')

+    def on_detach(self, fgraph):
+        self.shape_of = {}
+        self.scheduled = {}
+        self.shape_of_reverse_index = {}
+        del fgraph.shape_feature
+
    def on_import(self, fgraph, node, reason):
        if node.outputs[0] in self.shape_of:
            # this is a revert, not really an import
@@ -1430,19 +1436,28 @@ class ShapeFeature(object):

 class ShapeOptimizer(Optimizer):
    """Optimizer that serves to add ShapeFeature as an fgraph feature."""
-    def __init__(self):
-        Optimizer.__init__(self)
-
    def add_requirements(self, fgraph):
        fgraph.attach_feature(ShapeFeature())

    def apply(self, fgraph):
        pass

+class UnShapeOptimizer(Optimizer):
+    """Optimizer remove ShapeFeature as an fgraph feature."""
+    def apply(self, fgraph):
+        for feature in fgraph._features:
+            if isinstance(feature, ShapeFeature):
+                fgraph.remove_feature(feature)
+
 # Register it after merge1 optimization at 0. We don't want to track
 # the shape of merged node.
 theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(),
                                   0.1, 'fast_run', 'fast_compile')
+# Not enabled by default for now. Some crossentropy opt use the
+# shape_feature.  They are at step 2.01. uncanonicalize is at step
+# 3. After it goes to 48.5 that move to the gpu. So 10 seem resonable.
+theano.compile.mode.optdb.register('UnShapeOpt', UnShapeOptimizer(),
+                                   10)


 def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):