added draft of flatten for pascal, hope it parses!

cd520fbf · James Bergstra · 9655b929 · cd520fbf · cd520fbf
--- a/basic_ops.py
+++ b/basic_ops.py
@@ -1284,6 +1284,14 @@ class GpuIncSubtensor(tensor.IncSubtensor):
        rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
        return Apply(self, [x,y]+rval.inputs[2:], [x.type()])
+class GpuFlatten(tensor.Flatten):
+    def make_node(self, x ):
+        assert isinstance(x.type, CudaNdarrayType)
+        rval = tensor.Flatten.make_node(self, x)
+        host_out_broadcastable = rval.outputs[0].type.broadcastable
+        out_type = CudaNdarrayType(broadcastable=host_out_broadcastable)
+        return Apply(self, [x], [out_type()])
 class GpuShape(tensor.Shape):
    def make_node(self, x):
        return Apply(self, [x], [tensor.lvector()])

--- a/opt.py
+++ b/opt.py
@@ -168,6 +168,22 @@ def local_gpu_reshape(node):
            return [host_from_gpu(GpuReshape(node.op.ndim)(gpu_x, shp))]
    return False
+@register_opt()
+@local_optimizer([])
+def local_gpu_flatten(node):
+    if node.op == gpu_from_host:
+        host_input = node.inputs[0]
+        if host_input.owner and isinstance(host_input.owner.op, tensor.Flatten):
+            outdim = host_input.owner.op.outdim
+            return [GpuFlatten(outdim)(gpu_from_host(host_input.inputs[0]))]
+    if isinstance(node.op, tensor.Flatten):
+        x, = node.inputs
+        outdim = node.op.outdim
+        if x.owner and x.owner.op == host_from_gpu:
+            gpu_x, = x.owner.inputs
+            return [host_from_gpu(GpuFlatten(outdim)(gpu_x))]
+    return False
 @register_opt()
 @local_optimizer([])
 def local_gpu_subtensor(node):