In local_gpu_careduce, remove dimensions that is now detected as…

In local_gpu_careduce, remove dimensions that is now detected as rebroadcastable, but wasn't during graph build.

In local_gpu_careduce, remove dimensions that is now detected as…
28922228 · Frederic · a4643310 · 28922228
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -628,15 +628,31 @@ def local_gpu_careduce(node):
                greduce = GpuCAReduce(reduce_mask, scalar_op)
                if greduce.supports_c_code([gpu_from_host(x)]):
                    rval = host_from_gpu(greduce(gpu_from_host(x)))
-                    if rval.type == node.outputs[0].type:
+                    out = node.outputs[0]
+                    if rval.type == out.type:
                        return [rval]
                    else:
-                        print >> sys.stderr, (
+                        for b1, b2 in zip(rval.broadcastable,
-                            "WARNING: local_gpu_careduce got type wrong",
+                                          out.type.broadcastable):
-                            rval.type, node.outputs[0].type,
+                            if b1 is True:
-                            node.inputs[0].type,
+                                # It can happen that during
-                            node)
+                                # optimization we discover that the
-                        return None
+                                # input can be broadcasted, but didn't
+                                # know that at graph build time.
+                                continue
+                            if b1 is False and b2 is True:
+                                # We should not loose the information
+                                # that one dimensions was
+                                # broadcastable.
+                                print >> sys.stderr, (
+                                    "WARNING: local_gpu_careduce got type"
+                                    " wrong",
+                                    rval.type, out.type,
+                                    node.inputs[0].type, x.type,
+                                    node)
+                                return None
+                        rval = patternbroadcast(rval,
+                                                out.type.broadcastable)
                else:
                    # Try to make a simpler pattern based on reshaping
@@ -665,21 +681,37 @@ def local_gpu_careduce(node):
                    if new_greduce.supports_c_code(reshaped_gpu_inputs):
                        reduce_reshaped_x = host_from_gpu(
                            new_greduce(gpu_reshaped_x))
+                        out = node.outputs[0]
-                        if reduce_reshaped_x.ndim != node.outputs[0].ndim:
+                        if reduce_reshaped_x.ndim != out.ndim:
-                            unreshaped_reduce = reduce_reshaped_x.reshape(
+                            rval = reduce_reshaped_x.reshape(
-                                tensor.stack(*shape_of[node.outputs[0]]))
+                                tensor.stack(*shape_of[out]))
                        else:
-                            unreshaped_reduce = reduce_reshaped_x
+                            rval = reduce_reshaped_x
-                        if unreshaped_reduce.type == node.outputs[0].type:
+                        if rval.type == out.type:
-                            return [unreshaped_reduce]
+                            return [rval]
                        else:
-                            print >> sys.stderr, (
+                            for b1, b2 in zip(rval.broadcastable,
-                                "WARNING: local_gpu_careduce got type wrong",
+                                              out.type.broadcastable):
-                                unreshaped_reduce.type, node.outputs[0].type,
+                                if b1 is True:
-                                node.inputs[0].type,
+                                    # It can happen that during
-                                node)
+                                    # optimization we discover that the
-                            return None
+                                    # input can be broadcasted, but didn't
+                                    # know that at graph build time.
+                                    continue
+                                if b1 is False and b2 is True:
+                                    # We should not loose the information
+                                    # that one dimensions was
+                                    # broadcastable.
+                                    print >> sys.stderr, (
+                                        "WARNING: local_gpu_careduce got type"
+                                        " wrong",
+                                        rval.type, out.type,
+                                        node.inputs[0].type, x.type,
+                                        node)
+                                    return None
+                                rval = patternbroadcast(rval,
+                                                        out.broadcastable)
    return False