merge + several things that can't be committed partially.

Mainly: - improved GpuSum.__str__ - fixed problem in make_node of IncSubtensor and Subtensor - added PyErr_Format()s to failure-handling code of GpuSum

merge + several things that can't be committed partially.
462fb359 · James Bergstra · 9e6c2c03 · 3e5f365f · 462fb359 · 462fb359
--- a/basic_ops.py
+++ b/basic_ops.py
--- a/elemwise.py
+++ b/elemwise.py
@@ -641,7 +641,7 @@ class NaiveAlgo(object):
        output_args = ", ".join("o%i_data, o%i_str"%(ipos, ipos) 
                for ipos in xrange(len(node.outputs)))

-        prod_dims = '*'.join("dims[%i]"%di for di in xrange(nd))
+        prod_dims = '*'.join(["dims[%i]"%di for di in xrange(nd)]+['1'])

        scalar_op=self.scalar_op.__class__.__name__


--- a/opt.py
+++ b/opt.py
@@ -183,6 +183,41 @@ def local_gpu_subtensor(node):
            return [host_from_gpu(GpuSubtensor(node.op.idx_list)(gpu_x, *coords))]
    return False

+@register_opt()
+@local_optimizer([])
+def local_gpu_incsubtensor(node):
+    if node.op == gpu_from_host:
+        host_output = node.inputs[0]
+        if host_output.owner and type(host_output.owner.op) == tensor.IncSubtensor:
+            incsubt = host_output.owner.op
+            x, y = host_output.owner.inputs[0:2]
+            coords = host_output.owner.inputs[2:]
+            return [GpuIncSubtensor(incsubt.idx_list, inplace=incsubt.inplace)(
+                gpu_from_host(x),
+                gpu_from_host(y),
+                *coords)]
+    if type(node.op) == tensor.IncSubtensor:
+        x, y = node.inputs[0:2]
+        assert isinstance(x.type, tensor.TensorType)
+        assert isinstance(y.type, tensor.TensorType)
+        coords = node.inputs[2:]
+        go_gpu = False
+        if x.owner and x.owner.op == host_from_gpu:
+            go_gpu = True
+            gpu_x, = x.owner.inputs
+        else:
+            gpu_x = gpu_from_host(x)
+        if y.owner and y.owner.op == host_from_gpu:
+            go_gpu = True
+            gpu_y, = y.owner.inputs
+        else:
+            gpu_y = gpu_from_host(y)
+        if go_gpu:
+            return [host_from_gpu(GpuIncSubtensor(
+                node.op.idx_list, inplace=node.op.inplace)(
+                    gpu_x, gpu_y, *coords))]
+    return False
+
 @register_opt()
 @local_optimizer([])
 def local_gpu_shape(node):