modified cuda opt to insert GpuOuter as special case of GpuDot22

fe7f1d09 · James Bergstra · 5d652817 · fe7f1d09
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -423,23 +423,19 @@ def local_gpu_gemm(node):
 @local_optimizer([])
 def local_gpu_outer(node):
    """
-    gpu_from_host(outer) -> gpu_outer(gpu_from_host)
+    gpu_dot22(col, row) -> gpu_outer
-    outer(host_from_gpu) -> host_from_gpu(gpu_outer)
    """
-    if node.op == gpu_from_host:
+    if node.op == gpu_dot22:
-        host_input = node.inputs[0]
+        l, r = node.inputs
-        if host_input.owner and host_input.owner.op == tensor.basic.outer:
+        if l.type.broadcastable[1] and r.type.broadcastable[0]:
-            x, y = host_input.owner.inputs
+            # TODO: we would like to remove the double-dimshuffle when l or r is
-            # gpu_outer will refuse to work with float64 so future-proof
+            # already the output of a GpuDimshuffle. To do this, refactor the
-            if x.type.dtype == 'float32' and y.type.dtype == 'float32':
+            # logic in tensor/opt.py that collapses dimshuffle chains so that we
-                return [gpu_outer(gpu_from_host(x), gpu_from_host(y))]
+            # can call it from here.
-    if node.op == tensor.basic.outer:
+            lvec = GpuDimShuffle(l.broadcastable, [0])(l)
-        x, y = node.inputs
+            rvec = GpuDimShuffle(r.broadcastable, [1])(r)
-        x_on_gpu = (x.owner and x.owner.op == host_from_gpu and x.type.dtype == 'float32')
+            return [gpu_outer(lvec, rvec)]
-        y_on_gpu = (y.owner and y.owner.op == host_from_gpu and x.type.dtype == 'float32')
-        if x_on_gpu or y_on_gpu:
-            return [host_from_gpu(gpu_outer(as_cuda_ndarray_variable(x), as_cuda_ndarray_variable(y)))]
    return False
 @register_opt()