Merge pull request #1279 from jsalvatier/advinc_rebase3

Advinc rebase3

Merge pull request #1279 from jsalvatier/advinc_rebase3
348e14dc · lamblin · 517540f5 · aed7bd08 · 348e14dc · 348e14dc
--- a/theano/gof/cutils.py
+++ b/theano/gof/cutils.py
-import os, sys
+import os
+import sys

 from theano.compat import PY3
 from theano.gof.compilelock import get_lock, release_lock
 from theano import config
+import cmodule 

 # TODO These two lines may be removed in the future, when we are 100% sure
 # noone has an old cutils_ext.so lying around anymore.
@@ -12,8 +14,66 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):

 def compile_cutils():
    """Do just the compilation of cutils_ext"""
-    code = """
+
+    types = ['npy_' + t for t in ['int8', 'int16', 'int32', 'int64', 'int128',
+        'int256', 'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256',
+        'float16', 'float32', 'float64', 'float80', 'float96', 'float128',
+        'float256']]
+
+    complex_types = ['npy_' + t for t in ['complex32', 'complex64',
+        'complex128', 'complex160', 'complex192', 'complex512']]
+
+    inplace_map_template = """
+    #if defined(%(typen)s)
+    static void %(type)s_inplace_add(PyArrayMapIterObject *mit, PyArrayIterObject *it)
+    {
+        int index = mit->size;
+        while (index--) {
+            %(op)s
+
+            PyArray_MapIterNext(mit);
+            PyArray_ITER_NEXT(it);
+        }
+    }
+    #endif
+    """
+
+    floatadd = "((%(type)s*)mit->dataptr)[0] = ((%(type)s*)mit->dataptr)[0] + ((%(type)s*)it->dataptr)[0];"
+    complexadd = """
+    ((%(type)s*)mit->dataptr)[0].real = ((%(type)s*)mit->dataptr)[0].real + ((%(type)s*)it->dataptr)[0].real;
+    ((%(type)s*)mit->dataptr)[0].imag = ((%(type)s*)mit->dataptr)[0].imag + ((%(type)s*)it->dataptr)[0].imag;
+    """
+
+    fns = ''.join([inplace_map_template % {'type': t, 'typen': t.upper(),
+                                           'op': floatadd % {'type': t}}
+                        for t in types] +
+                  [inplace_map_template % {'type': t, 'typen': t.upper(),
+                                           'op': complexadd % {'type': t}}
+                        for t in complex_types])
+
+    fn_array = ("inplace_map_binop addition_funcs[] = {" +
+            ''.join(["""
+            #if defined(%(typen)s)
+            %(type)s_inplace_add,
+            #endif
+            """ % {'type': t, 'typen': t.upper()}
+                for t in types + complex_types]) +
+            """NULL};
+            """)
+
+    type_number_array = ("int type_numbers[] = {" +
+            ''.join(["""
+            #if defined(%(typen)s)
+            %(typen)s,
+            #endif
+            """ % {'type': t, 'typen': t.upper()}
+                for t in types + complex_types]) +
+            "-1000};")
+
+    code = ("""
        #include <Python.h>
+        #include "numpy/arrayobject.h"
+
        extern "C"{
        static PyObject *
        run_cthunk(PyObject *self, PyObject *args)
@@ -35,14 +95,130 @@ def compile_cutils():
          return Py_BuildValue("i", failure);
        }

+        #if NPY_API_VERSION >= 0x00000008
+        typedef void (*inplace_map_binop)(PyArrayMapIterObject *, PyArrayIterObject *);
+        """ + fns + fn_array + type_number_array +
+
+"""
+static int
+map_increment(PyArrayMapIterObject *mit, PyObject *op, inplace_map_binop add_inplace)
+{
+    PyArrayObject *arr = NULL;
+    PyArrayIterObject *it;
+    PyArray_Descr *descr;
+    if (mit->ait == NULL) {
+        return -1;
+    }
+    descr = PyArray_DESCR(mit->ait->ao);
+    Py_INCREF(descr);
+    arr = (PyArrayObject *)PyArray_FromAny(op, descr,
+                                0, 0, NPY_ARRAY_FORCECAST, NULL);
+    if (arr == NULL) {
+        return -1;
+    }
+    if ((mit->subspace != NULL) && (mit->consec)) {
+        if (mit->iteraxes[0] > 0) {
+            PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
+            if (arr == NULL) {
+                return -1;
+            }
+        }
+    }
+    it = (PyArrayIterObject*)
+            PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
+    if (it  == NULL) {
+        Py_DECREF(arr);
+        return -1;
+    }
+
+    (*add_inplace)(mit, it);
+
+    Py_DECREF(arr);
+    Py_DECREF(it);
+    return 0;
+}
+
+
+static PyObject *
+inplace_increment(PyObject *dummy, PyObject *args)
+{
+    PyObject *arg_a = NULL, *index=NULL, *inc=NULL;
+    PyArrayObject *a;
+    inplace_map_binop add_inplace = NULL;
+    int type_number = -1;
+    int i =0;
+    PyArrayMapIterObject * mit;
+
+    if (!PyArg_ParseTuple(args, "OOO", &arg_a, &index,
+            &inc)) {
+        return NULL;
+    }
+    if (!PyArray_Check(arg_a)) {
+         PyErr_SetString(PyExc_ValueError, "needs an ndarray as first argument");
+         return NULL;
+    }
+
+    a = (PyArrayObject *) arg_a;
+
+    if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
+        return NULL;
+    }
+
+    if (PyArray_NDIM(a) == 0) {
+        PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
+        return NULL;
+    }
+    type_number = PyArray_TYPE(a);
+
+
+
+    while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
+        if (type_number == type_numbers[i]) {
+            add_inplace = addition_funcs[i];
+            break;
+        }
+        i++ ;
+    }
+
+    if (add_inplace == NULL) {
+        PyErr_SetString(PyExc_TypeError, "unsupported type for a");
+        return NULL;
+    }
+    mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
+    if (mit == NULL) {
+        goto fail;
+    }
+    if (map_increment(mit, inc, add_inplace) != 0) {
+        goto fail;
+    }
+
+    Py_DECREF(mit);
+
+    Py_INCREF(Py_None);
+    return Py_None;
+
+fail:
+    Py_XDECREF(mit);
+
+    return NULL;
+}
+        #endif
+
+
        static PyMethodDef CutilsExtMethods[] = {
            {"run_cthunk",  run_cthunk, METH_VARARGS|METH_KEYWORDS,
             "Run a theano cthunk."},
+            #if NPY_API_VERSION >= 0x00000008
+            {"inplace_increment",  inplace_increment,
+              METH_VARARGS,
+             "increments a numpy array inplace at the passed indexes."},
+            #endif
            {NULL, NULL, 0, NULL}        /* Sentinel */
-        };"""
+        };""")
+
    if PY3:
-        # This is not the most efficient code, but it is written this way to highlight
-        # the changes needed to make 2.x code compile under python 3.
+        # This is not the most efficient code, but it is written this way to
+        # highlight the changes needed to make 2.x code compile under python 3.
        code = code.replace("<Python.h>", '"numpy/npy_3kcompat.h"', 1)
        code = code.replace("PyCObject", "NpyCapsule")
        code += """
@@ -59,15 +235,16 @@ def compile_cutils():
            return PyModule_Create(&moduledef);
        }
        }
-    """
+        """
    else:
        code += """
        PyMODINIT_FUNC
        initcutils_ext(void)
        {
+          import_array();
          (void) Py_InitModule("cutils_ext", CutilsExtMethods);
        }
-        }
+    } //extern C
        """

    loc = os.path.join(config.compiledir, 'cutils_ext')
@@ -95,8 +272,6 @@ try:
    try:
        from cutils_ext.cutils_ext import *
    except ImportError:
-        import cmodule
-
        get_lock()
    # Ensure no-one else is currently modifying the content of the compilation
    # directory. This is important to prevent multiple processes from trying to
@@ -108,7 +283,6 @@ try:
                # and when we receive the lock
                from cutils_ext.cutils_ext import *
            except ImportError:
-                import cmodule

                compile_cutils()
                from cutils_ext.cutils_ext import *

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -23,7 +23,8 @@ import numpy
 import theano
 from theano.compat import PY3
 from theano import gof
-from theano.gof import Op, utils, Variable, Constant, Type, Apply, FunctionGraph
+from theano.gof import (Op, utils, Variable, Constant, Type, Apply,
+        FunctionGraph)
 from theano.gof.python25 import partial, all, any
 from theano.configparser import config

@@ -1090,7 +1091,7 @@ class UnaryBitOp(UnaryScalarOp):
        return upcast_out(*input_types[0])

    def grad(self, inputs, output_gradients):
-        return [None]
+        return [inputs[0].zeros_like().astype(theano.config.floatX)]


 class BinaryBitOp(BinaryScalarOp):
@@ -1103,7 +1104,8 @@ class BinaryBitOp(BinaryScalarOp):
        return upcast_out(*input_types[0])

    def grad(self, inputs, output_gradients):
-        return [None, None]
+        a,b = inputs
+        return [a.zeros_like().astype(theano.config.floatX), b.zeros_like().astype(theano.config.floatX)]


 class OR(BinaryBitOp):
@@ -2679,7 +2681,7 @@ class Composite(ScalarOp):
        except AttributeError:
            if 0:
                l = []
-                for n in fgraph.toposort():
+                for n in self.fgraph.toposort():
                    if hasattr(n.op, "name") and n.op.name is not None:
                        v = n.op.name
                        if v.startswith("Composite"):

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -37,13 +37,13 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast,
        var, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
        get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal,
-        iscalars, arange,  dscalars, fvector, imatrix, numeric_grad,
+        iscalars, arange, dscalars, fvector, imatrix, numeric_grad,
        opt, ComplexError, lvector, lmatrix, true_div, max, min, Split, roll,
        tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag,
-        nonzero, flatnonzero, nonzero_values)
+        nonzero, flatnonzero, nonzero_values, inplace_increment)
 from theano.tests import unittest_tools as utt


@@ -3131,10 +3131,6 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        n = self.shared(numpy.asarray(5, dtype=self.dtype))
        self.assertRaises(TypeError, n.__getitem__, [0, 0])

-    def test_err_invalid_not_2d(self):
-        n = self.shared(numpy.ones((3, 3, 3), dtype=self.dtype) * 5)
-        self.assertRaises(NotImplementedError, n.__getitem__,
-                          ([0, 0, 0], [1, 1, 1], [2, 2, 2]))

    def test_err_invalid_2list_dtype(self):
        n = self.shared(numpy.ones((3, 3), dtype=self.dtype) * 5)
@@ -3725,6 +3721,109 @@ class TestIncSubtensor1(unittest.TestCase):
        self.assertRaises(TypeError,
                lambda: inc_subtensor(self.v[self.adv1q], fmatrix()))

+inplace_increment_missing = SkipTest("inc_subtensor with advanced indexing not enabled. "
+                       "Installing NumPy 1.8 or the latest development version "
+                       "should make that feature available.")
+
+class TestAdvancedSubtensor(unittest.TestCase):
+    # test inc_subtensor
+    # also tests set_subtensor
+
+    def setUp(self):
+        self.s = iscalar()
+        self.v = fvector()
+        self.m = dmatrix()
+        self.t = ctensor3()
+
+        self.ix1 = lvector()  # advanced 1d query
+        self.ix12 = lvector()
+        self.ix2 = lmatrix()
+
+    def test_cant_adv_idx_into_scalar(self):
+        self.assertRaises(TypeError, lambda: self.s[self.ix1])
+
+    def test_index_into_vec_w_vec(self):
+        a = self.v[self.ix1]
+        assert a.type == self.v.type, (a.type, self.v.type)
+
+    def test_index_into_vec_w_matrix(self):
+        a = self.v[self.ix2]
+        assert a.dtype == self.v.dtype, (a.dtype, self.v.dtype)
+        assert a.broadcastable == self.ix2.broadcastable, (
+                a.broadcastable, self.ix2.broadcastable)
+
+    def test_inc_adv_subtensor_w_matrix(self):
+        if inplace_increment is None: 
+            raise inplace_increment_missing
+        
+        subt = self.v[self.ix2]
+        a = inc_subtensor(subt,subt)
+
+        assert a.type == self.v.type, (a.type, self.v.type)
+        f = theano.function([self.v, self.ix2], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [[1, 2],
+                                [1, 2]])
+        assert numpy.allclose(aval, [.4, .9 * 3, .1 * 3])
+
+    def test_inc_adv_subtensor_w_2vec(self):
+        if inplace_increment is None: 
+            raise inplace_increment_missing
+
+        subt = self.m[self.ix1, self.ix12]
+        a = inc_subtensor(subt, subt)
+
+        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        assert a.type == typ, (a.type, typ)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5,   6,  7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 * 3,   6,  7],
+                  [.5, .3 * 2, .15]]), aval
+
+    def test_inc_adv_subtensor_with_broadcasting(self):
+        if inplace_increment is None: 
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix12], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5,   6,  7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 + 2.1 * 2,   6,  7],
+                  [.5, .3 + 2.1, .15]]), aval
+
+    def test_inc_adv_subtensor_with_index_broadcasting(self):
+        if inplace_increment is None: 
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix2], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5,   6,  7],
+                  [.5, .3, .15]],
+                 [0, 2, 0],
+                 [[0, 1, 0],
+                  [2, 2, 2]])
+        assert numpy.allclose(aval,
+                [[.4 + 2*2.1, .9, .1 + 2*2.1],
+                  [5 ,   6,  7 ],
+                  [.5, .3 + 2.1, .15 + 2.1]]), aval

 class T_Join_and_Split(unittest.TestCase):
    """