Fix GpuIncSubtensor since it relied on the internals of GpuElemwise which have changed.

3fbcbf65 · Arnaud Bergeron · 59466ca0 · 3fbcbf65
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -18,10 +18,9 @@ try:
 except ImportError:
    pass
-from .type import GpuArrayType
+from .type import GpuArrayType, gpu_context_type
 from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
                        infer_context_name)
-from .elemwise import GpuElemwise
 class GpuSubtensor(HideC, Subtensor):
@@ -168,7 +167,7 @@ class GpuSubtensor(HideC, Subtensor):
        return (6,)
-class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
+class GpuIncSubtensor(IncSubtensor):
    """
    Implement IncSubtensor on the gpu.
@@ -181,45 +180,20 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
    :meth:`copy_of_x`, etc. specialize the c_code for this Op.
    """
+    _f16_ok = True
-    @property
+    params_type = gpu_context_type
-    def _f16_ok(self):
-        return self.iadd_node.op._f16_ok
-    def c_headers(self):
-        return self.iadd_node.op.c_headers()
-    def c_init_code(self):
-        return self.iadd_node.op.c_init_code()
-    def gpu_kernels(self, node, nodename):
-        subname = nodename + "_add_to_zview"
-        return self.iadd_node.op.gpu_kernels(self.iadd_node, subname)
    def make_node(self, x, y, *inputs):
        ctx_name = infer_context_name(x, y)
        x = as_gpuarray_variable(x, ctx_name)
        y = as_gpuarray_variable(y, ctx_name)
        rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
-        op = copy.copy(self)
+        ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()])
-        ret = gof.Apply(op, [x, y] + rval.inputs[2:], [x.type()])
-        op.create_iadd_node(ret)
        return ret
    def get_params(self, node):
        return node.outputs[0].type.context
-    def create_iadd_node(self, node):
-        # We store a iadd_node in the op that contain the info needed
-        # for the inplace add.
-        cop = theano.tensor.inplace.add_inplace
-        gop = GpuElemwise(cop.scalar_op, copy.copy(cop.inplace_pattern),
-                          "Gpu" + cop.name, cop.nfunc_spec)
-        y = node.inputs[1]
-        xview = y.type()
-        iadd_node = gop(xview, y).owner
-        self.iadd_node = iadd_node
    def perform(self, node, inputs, out_, ctx):
        out, = out_
        x, y = inputs[:2]
@@ -261,18 +235,6 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
                x.__setitem__(cdata, y)
        out[0] = x
-    def __setstate__(self, d):
-        self.__dict__.update(d)
-        owner = getattr(self, "owner", None)
-        if owner:
-            self.create_iadd_node(owner)
-    def __getstate__(self):
-        d = copy.copy(self.__dict__)
-        if "iadd_node" in d:
-            d.pop('iadd_node')
-        return d
    def do_type_checking(self, node):
        """
        Should raise NotImplementedError if c_code does not support
@@ -365,47 +327,54 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
        """
        return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals()
+    def c_headers(self):
+        return ['<numpy_compat.h>', '<gpuarray/error.h>', '<gpuarray/array.h>',
+                '<gpuarray/elemwise.h>']
    def c_support_code_struct(self, node, nodename):
-        gop = self.iadd_node.op
+        return "\nGpuElemwise *iadd;\n"
-        sub_name = nodename + "_add_to_zview"
-        ret = gop.c_support_code_struct(self.iadd_node, sub_name)
-        ret += """
-        PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst,
-                                                    PyGpuArrayObject* src){
-           PyGpuArrayObject* ret = NULL;
-        """ % locals()
-        inputs = ["dst", "src"]
-        outputs = ["ret"]
-        sub = {"fail": "return NULL;", "params": "dst->context"}
-        ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub)
-        ret += """
-            return ret;
+    def c_init_code_struct(self, node, name, sub):
+        return """
+        gpuelemwise_arg args[2] = {{0}};
+        args[0].name = "a";
+        args[0].typecode = %(type1)s;
+        args[0].flags = GE_READ|GE_WRITE;
+        args[0].nd = %(nd)s;
+        args[1].name = "b";
+        args[1].typecode = %(type2)s;
+        args[1].flags = GE_READ;
+        args[1].nd = %(nd)s;
+        iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b",
+                               2, args, 0);
+        if (iadd == NULL) {
+          PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
+          %(fail)s
        }
-        """
+        """ % dict(ctx=sub['params'], fail=sub['fail'],
-        return ret
+                   type1=node.inputs[0].type.typecode,
+                   type2=node.inputs[1].type.typecode,
+                   nd=node.inputs[1].ndim)
    def add_to_zview(self, nodename, x, fail):
        return """
-        PyGpuArrayObject * add_result = inc_sub_iadd_%(nodename)s(zview, %(x)s);
-        if (! add_result )
        {
+          void *args[2];
+          args[0] = &zview->ga;
+          args[1] = &%(x)s->ga;
+          if (GpuElemwise_call(iadd, args, GE_BROADCAST) != GA_NO_ERROR) {
+            PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add");
            Py_DECREF(zview);
-            %(fail)s;
+            %(fail)s
-        }
+          }
-        else
-        {
-            Py_DECREF(add_result);
        }
        """ % locals()
    def c_code_cache_version(self):
        parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
-        elemwise_version = self.iadd_node.c_code_cache_version()
+        if not parent_version:
-        if not parent_version or not elemwise_version:
            return
-        return parent_version + elemwise_version + (3,)
+        return parent_version + (4,)
 class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):