Fixed optimization in cuda/opt - tightened the check for replacing elemwise with

gpuelemwise

Fixed optimization in cuda/opt - tightened the check for replacing elemwise with
8c20dd95 · James Bergstra · febb463d · 8c20dd95
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -73,13 +73,11 @@ gpu_cut_copies.register('cut_gpu_constant_transfers', tensor.opt.constant_foldin
 def local_gpu_elemwise_0(node):
    if isinstance(node.op, tensor.Elemwise):
        if numpy.any([hasattr(i.owner, 'op') and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
-            if numpy.any([o.type.dtype == 'float64' for o in node.outputs]):
+            if numpy.all([i.type.dtype == 'float32' for i in node.inputs]):
-                print 'WARNING: THERE ARE STILL float64s in your graph local_gpu_elemwise_0', node
+                if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
-            else:
+                    new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern)
-                # move the add to a GpuAdd
+                    #TODO: change this when fusion makes Elemwise with multiple outputs
-                new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern)
+                    return [host_from_gpu(new_op(*(gpu_from_host(i) for i in node.inputs)))]
-                return [host_from_gpu(new_op(*(gpu_from_host(i) for i in node.inputs)))]
-    return False
 @register_opt()
 @local_optimizer([])