Merge pull request #45 from nouiz/mixed_commit

Mixed commit

Merge pull request #45 from nouiz/mixed_commit
91935a04 · James Bergstra · c0fd55c9 · 9637468f · 91935a04 · 91935a04
--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -593,6 +593,12 @@ __global__ void k_copy_reshape_rowmajor(unsigned int numEls,
        z_i[0] = a_i[0]; //copy one lousy float!
    }
 }
+// Reshape self to the new shape gived by the tuple shape.
+//
+// If self is c contiguous, it return a view. Otherwise it always do a copy.
+// TODO: make it return a view when the strides allow it event if it is not 
+//       c contiguous
 PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape)
 {
    // check shape tuple
@@ -717,6 +723,7 @@ PyObject * CudaNdarray_View(CudaNdarray * self)
    }
    return (PyObject*)rval;
 }
 PyObject * CudaNdarray_SetStride(CudaNdarray * self, PyObject *args)
 {
    int pos, stride;
@@ -803,6 +810,9 @@ static PyMethodDef CudaNdarray_methods[] =
    {"copy",
        (PyCFunction)CudaNdarray_Copy, METH_NOARGS,
        "Create a copy of this object"},
+    {"is_c_contiguous",
+        (PyCFunction)CudaNdarray_IS_C_Contiguous, METH_NOARGS,
+        "Return True is the object is c contiguous. False otherwise."},
    {"reduce_sum",
        (PyCFunction)CudaNdarray_ReduceSum, METH_O,
        "Reduce over the given dimensions by summation"},

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -504,6 +504,10 @@ CudaNdarray_ZEROS(int n, int * dims);
 * True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1]
 */
 bool CudaNdarray_is_c_contiguous(const CudaNdarray * self);
+PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self)
+{
+    return PyBool_FromLong(CudaNdarray_is_c_contiguous(self));
+}
 int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C);
 int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray* A);

--- a/theano/sandbox/cuda/tests/test_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_cuda_ndarray.py
@@ -909,6 +909,11 @@ def test_base():
    e = b.reshape((5,2,2,3))
    assert e.base is a
+def test_is_c_contiguous():
+    a = cuda_ndarray.CudaNdarray.zeros((3,4,5))
+    assert a.is_c_contiguous()
+    assert a[1].is_c_contiguous()
+    assert not a[::2].is_c_contiguous()
 if __name__ == '__main__':
    test_zeros_basic_3d_tensor()

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4880,7 +4880,11 @@ class AdvancedSubtensor1(Op):
        x, i = inp
        out, = out_
        # Copy always implied by numpy advanced indexing semantic.
-        out[0] = x[i]
+        if out[0] is not None and out[0].shape==(len(i),)+x.shape[1:]:
+            o = out[0]
+        else:
+            o = None
+        out[0] = x.take(i, axis=0, out=o)
    def grad(self, inputs, grads):
        gz, = grads

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1809,6 +1809,8 @@ class T_subtensor(unittest.TestCase):
        self.inc_sub = inc_sub
        self.adv_sub1 = adv_sub1
        self.adv_incsub1 = adv_incsub1
+        if mode is None:
+            mode = theano.compile.mode.get_default_mode()
        self.mode = mode
        self.dtype = dtype
        self.ignore_topo = ignore_topo
@@ -1885,7 +1887,9 @@ class T_subtensor(unittest.TestCase):
    def test2_ok_range_finite(self):
        n = self.shared(numpy.ones((3,4), dtype=self.dtype)*5)
-        t = n[0:2,3]
+        # Also check negative index
+        for idx in [(slice(0,2),3),((slice(0,2),-1)),(slice(0,2),-4)]:
+            t = n[idx]#l]#0:2,3]
            self.assertTrue(isinstance(t.owner.op, Subtensor))
            f = inplace_func([], t, mode=self.mode)
            topo = f.maker.env.toposort()
@@ -1894,7 +1898,7 @@ class T_subtensor(unittest.TestCase):
            assert isinstance(topo_[0].op, self.sub)
            tval = f()
            self.assertTrue(tval.shape == (2,))
-        self.assertTrue(tval[1] == 5.0)
+            self.assertTrue(numpy.allclose(tval, n.get_value()[idx]))
    def test1_err_invalid(self):
        n = self.shared(numpy.ones(1, dtype=self.dtype))
@@ -1946,7 +1950,8 @@ class T_subtensor(unittest.TestCase):
    def test2_err_bounds0(self):
        n = self.shared(numpy.ones((2,3), dtype=self.dtype)*5)
-        t = n[0,4]
+        for idx in [(0,4),(0,-4)]:
+            t = n[idx]
            self.assertTrue(isinstance(t.owner.op, Subtensor))
            # Silence expected warnings
            _logger = logging.getLogger('theano.gof.opt')
@@ -1960,6 +1965,7 @@ class T_subtensor(unittest.TestCase):
                    pass
            finally:
                _logger.setLevel(oldlevel)
    def test2_err_bounds1(self):
        n = self.shared((numpy.ones((2,3), dtype=self.dtype)*5))
        t = n[4:5,2]
@@ -2075,6 +2081,10 @@ class T_subtensor(unittest.TestCase):
                          (numpy.random.rand(4,5), [2,3]),
                          (numpy.random.rand(4,2,3), [0,3]),
                          (numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]),
+                          (numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0,-1,-2,-3,-4]),
+                          # Test 4 dims as gpu code use another algo in that case
+                          # This new algo is not as much optimized for that case.
+                          (numpy.random.rand(4,4,2,3), [3,3,1,1,2,2,0,0,-1,-2,-3,-4]),
                          # Test with TensorConstant index.
                          (numpy.random.rand(4,2,3), constant([3,3,1,1,2,2,0,0])),
                          ]:
@@ -2093,6 +2103,19 @@ class T_subtensor(unittest.TestCase):
            self.assertTrue(val.ndim == data.ndim)
            self.assertTrue(numpy.allclose(val, good), (val, good))
+            # Test reuse of output memory
+            if isinstance(self.adv_sub1,tensor.AdvancedSubtensor1):
+                op = self.adv_sub1()
+                # When idx is a TensorConstant.
+                if hasattr(idx, "data"):
+                    idx = idx.data
+                test_out = [[None]]
+                op.perform(None, [data, idx],test_out)
+                out1 = test_out[0][0]
+                op.perform(None, [data, idx],test_out)
+                out2 = test_out[0][0]
+                assert out1 is out2
    def test_err_invalid_list(self):
        n = self.shared(numpy.asarray(5, dtype=self.dtype))
        self.assertRaises(TypeError, n.__getitem__, [0,0])
@@ -2104,16 +2127,18 @@ class T_subtensor(unittest.TestCase):
    def test_err_bound_list(self):
        n = self.shared(numpy.ones((2,3),dtype=self.dtype)*5)
-        t = n[[0,4]]
+        l = lvector()
+        t = n[l]
        # We test again AdvancedSubtensor1 as we transfer data to the cpu.
        self.assertTrue(isinstance(t.owner.op, theano.tensor.basic.AdvancedSubtensor1))
-        f = function([], t, mode=self.mode)
+        f = function([l], t, mode=self.mode)
        topo = f.maker.env.toposort()
        topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
        assert len(topo_)==1
        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        self.assertRaises(IndexError, f)
+        for shp in [[0,4],[0,-3], [-10]]:
+            self.assertRaises(IndexError, f, shp)
    def test_adv_sub1_broadcast(self):
        ones = numpy.ones((1,3), dtype=self.dtype)