Merge pull request #364 from yaoli/GetItem2D_GetItemScalar_ReviewTest

Ops:GetItem2D&GetItemScalar to return a subtensor/scalar from a sparse m...

Merge pull request #364 from yaoli/GetItem2D_GetItemScalar_ReviewTest
26b9590f · nouiz · 1b773bb2 · 3d8f22b4 · 26b9590f · 26b9590f
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -163,7 +163,7 @@ class _sparse_py_operators:
    #N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
    #     Discussion with Fred & James (at least, and maybe others before)
    #     we decided that casting from a sparse to dense should be explicit
-    #     because it's usually something you want to be pretty careful about,
+    #     because it's usually something you just want to be pretty careful about,
    #     and not to do by accident.
    #def _as_TensorVariable(self):
    #    return dense_from_sparse(self)
@@ -184,7 +184,26 @@ class _sparse_py_operators:
    def zeros_like(model):
        return sp_zeros_like(model)
+    def __getitem__(self, args):
+        if not isinstance(args, tuple):
+            args = args,
+        scalar_var = tensor.iscalar()
+        if len(args) == 2:
+            scalar_arg_1 = (numpy.isscalar(args[0]) or
+                            getattr(args[0], 'type', None) == scalar_var.type)
+            scalar_arg_2 = (numpy.isscalar(args[1]) or
+                            getattr(args[1], 'type', None) == scalar_var.type)
+            if scalar_arg_1 and scalar_arg_2:
+                ret = get_item_scalar(self, args)
+            else:
+                ret = get_item_2d(self, args)
+        else:
+            ret = get_item_2d(self, args)
+        return ret
 class SparseVariable(gof.Variable, _sparse_py_operators):
    dtype = property(lambda self: self.type.dtype)
    format = property(lambda self: self.type.format)
@@ -625,7 +644,126 @@ class SparseFromDense(gof.op.Op):
 csr_from_dense = SparseFromDense('csr')
 csc_from_dense = SparseFromDense('csc')
+# Indexing
+class GetItem2d(gof.op.Op):
+    """
+    Implement a subtensor of sparse variable and that return a sparse matrix.
+    If you want to take only one element of a sparse matrix see the class GetItemScalar
+    that return a tensor scalar.
+    :note:
+    that subtensor selection always returns a matrix so indexing with [a:b, c:d] is forced.
+    If one index is a scalar, e.g. x[a:b, c] and x[a, b:c], generate an error. Use instead
+    x[a:b, c:c+1] and x[a:a+1, b:c].
+    The above indexing methods are not supported because the rval would be a sparse
+    matrix rather than a sparse vector, which is a deviation from numpy indexing rule.
+    This decision is made largely for keeping the consistency between numpy and theano. 
+    Subjected to modification when sparse vector is supported.
+    """
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+# Fred:Too complicated for now. If you need it, look at the Subtensor.infer_shape.
+#    def infer_shape(self, node, i0_shapes):
+#        return i0_shapes
+    def make_node(self, x, index):
+        x = as_sparse_variable(x)
+        assert len(index) in [1, 2]
+        input_op = [x]
+        for ind in index:
+            if isinstance(ind, slice):
+                # in case of slice is written in theano variable 
+                start = ind.start
+                stop = ind.stop
+                # in case of slice is written in python int
+                if isinstance(start,int):
+                    start = theano.tensor.constant(start)
+                if isinstance(stop,int):
+                    stop = theano.tensor.constant(stop)
+            #in case of indexing using python int
+            #elif isinstance(ind,int):
+            #    start = theano.tensor.constant(ind)
+            #    stop = start + 1
+            #elif ind.ndim == 0:
+            #    start = ind
+            #    stop = ind + 1
+            else:
+                raise NotImplemented('Theano has no sparse vector'+
+                                    'Use X[a:b,c:d], X[a:b,c:c+1] or X[a:b] instead.')
+            input_op += [start, stop]
+        if len(index)==1:
+            i = theano.gof.Constant(theano.gof.generic, None)
+            input_op += [i, i]
+        return gof.Apply(self, input_op, [x.type()])
+    def perform(self, node, (x, start1, stop1, start2, stop2), (out, )):
+        assert _is_sparse(x)
+        out[0] = x[start1:stop1, start2:stop2]
+    def __str__(self):
+        return self.__class__.__name__
+get_item_2d = GetItem2d()
+class GetItemScalar(gof.op.Op):
+    """
+    Implement a subtensor of a sparse variable that take two scalar as index and return a scalar
+    :see: GetItem2d to return more then one element.
+    """
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def infer_shape(self, node, i0_shapes):
+        return [()]
+    def make_node(self, x, index):
+        x = as_sparse_variable(x)
+        assert len(index)==2
+        input_op = [x]
+        for ind in index:
+            if isinstance(ind, slice):
+                raise Exception("GetItemScalar called with a slice as index!")
+            #in case of indexing using int instead of theano variable
+            elif isinstance(ind, int):
+                ind = theano.tensor.constant(ind)
+                input_op += [ind]
+            # in case of indexing using theano variable
+            elif ind.ndim == 0:
+                input_op += [ind]
+            else:
+                raise NotImplemented()
+        return gof.Apply(self, input_op, [tensor.scalar(dtype=x.dtype)])
+    def perform(self, node, (x, ind1, ind2), (out, )):
+        assert _is_sparse(x)
+        out[0] = x[ind1, ind2]
+    def __str__(self):
+        return self.__class__.__name__
+get_item_scalar = GetItemScalar()
 # Linear Algebra

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -24,6 +24,7 @@ from theano.sparse import SparseType, StructuredDotCSC
 from theano.sparse import add, mul, structured_dot, transpose
 from theano.sparse import csc_from_dense, csr_from_dense, dense_from_sparse
 from theano.sparse import Dot, Usmm, UsmmCscDense
+from theano.sparse import get_item_2d, get_item_scalar
 from theano.tests import unittest_tools as utt
 from theano import tensor
@@ -555,14 +556,8 @@ class test_structureddot(unittest.TestCase):
 class DotTests(unittest.TestCase):
    def setUp(self):
-        # On 32-bit platforms we use smaller matrices to avoid running out of
+        x_size = (10, 1000)
-        # memory during tests.
+        y_size = (1000, 10000)
-        if theano.gof.cmodule.local_bitwidth() <= 32:
-            x_size = (10, 100)
-            y_size = (100, 1000)
-        else:
-            x_size = (10, 1000)
-            y_size = (1000, 10000)
        self.x_csr = scipy.sparse.csr_matrix(
            numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
@@ -935,6 +930,131 @@ def test_size():
        check()
+def test_GetItem2D():
+    sparse_formats = ('csc', 'csr')
+    for format in sparse_formats:
+        x = theano.sparse.matrix(format)
+        a = theano.tensor.iscalar()
+        b = theano.tensor.iscalar()
+        c = theano.tensor.iscalar()
+        d = theano.tensor.iscalar()
+        # index
+        m = 1
+        n = 5
+        p = 10
+        q = 15
+        vx = as_sparse_format(numpy.random.binomial(1, 0.5, (100, 100)),
+                              format).astype(theano.config.floatX)
+        #mode_no_debug = theano.compile.mode.get_default_mode()
+        #if isinstance(mode_no_debug, theano.compile.DebugMode):
+        #    mode_no_debug = 'FAST_RUN'
+        f1 = theano.function([x, a, b, c, d], x[a:b, c:d])
+        r1 = f1(vx, m, n, p, q)
+        t1 = vx[m:n, p:q]
+        assert r1.shape == t1.shape
+        assert numpy.all(t1.toarray() == r1.toarray())
+        """"
+        Important: based on a discussion with both Fred and James
+        The following indexing methods is not supported because the rval
+        would be a sparse matrix rather than a sparse vector, which is a
+        deviation from numpy indexing rule. This decision is made largely
+        for keeping the consistency between numpy and theano.
+        f2 = theano.function([x, a, b, c], x[a:b, c])
+        r2 = f2(vx, m, n, p)
+        t2 = vx[m:n, p]
+        assert r2.shape == t2.shape
+        assert numpy.all(t2.toarray() == r2.toarray())
+        f3 = theano.function([x, a, b, c], x[a, b:c])
+        r3 = f3(vx, m, n, p)
+        t3 = vx[m, n:p]
+        assert r3.shape == t3.shape
+        assert numpy.all(t3.toarray() == r3.toarray())
+        f5 = theano.function([x], x[1:2,3])
+        r5 = f5(vx)
+        t5 = vx[1:2, 3]
+        assert r5.shape == t5.shape
+        assert numpy.all(r5.toarray() == t5.toarray())
+        f7 = theano.function([x], x[50])
+        r7 = f7(vx)
+        t7 = vx[50]
+        assert r7.shape == t7.shape
+        assert numpy.all(r7.toarray() == t7.toarray())
+        """
+        f4 = theano.function([x, a, b], x[a:b])
+        r4 = f4(vx, m, n)
+        t4 = vx[m:n]
+        assert r4.shape == t4.shape
+        assert numpy.all(t4.toarray() == r4.toarray())
+        #-----------------------------------------------------------
+        # test cases using int indexing instead of theano variable
+        f6 = theano.function([x], x[1:10, 10:20])
+        r6 = f6(vx)
+        t6 = vx[1:10, 10:20]
+        assert r6.shape == t6.shape
+        assert numpy.all(r6.toarray() == t6.toarray())
+        #----------------------------------------------------------
+        # test cases with indexing both with theano variable and int
+        f8 = theano.function([x, a, b], x[a:b, 10:20])
+        r8 = f8(vx, m, n)
+        t8 = vx[m:n, 10:20]
+        assert r8.shape == t8.shape
+        assert numpy.all(r8.toarray() == t8.toarray())
+        f9 = theano.function([x, a, b], x[1:a, 1:b])
+        r9 = f9(vx, p, q)
+        t9 = vx[1:p, 1:q]
+        assert r9.shape == t9.shape
+        assert numpy.all(r9.toarray() == t9.toarray())
+def test_GetItemScalar():
+    sparse_formats = ('csc', 'csr')
+    for format in sparse_formats:
+        x = theano.sparse.csc_matrix('x')
+        a = theano.tensor.iscalar()
+        b = theano.tensor.iscalar()
+        m = 50
+        n = 50
+        vx = as_sparse_format(numpy.random.binomial(1, 0.5, (100, 100)),
+                             format).astype(theano.config.floatX)
+        f1 = theano.function([x, a, b], x[a, b])
+        r1 = f1(vx, 10, 10)
+        t1 = vx[10, 10]
+        assert r1.shape == t1.shape
+        assert numpy.all(t1 == r1)
+        f2 = theano.function([x, a], x[50, a])
+        r2 = f2(vx, m)
+        t2 = vx[50, m]
+        assert r2.shape == t2.shape
+        assert numpy.all(t2 == r2)
+        f3 = theano.function([x, a], x[a, 50])
+        r3 = f3(vx, m)
+        t3 = vx[m, 50]
+        assert r3.shape == t3.shape
+        assert numpy.all(t3 == r3)
+        f4 = theano.function([x], x[50, 50])
+        r4 = f4(vx)
+        t4 = vx[m, n]
+        assert r3.shape == t3.shape
+        assert numpy.all(t4 == r4)
 import theano.tensor.tests.test_sharedvar
 test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester(
    shared_constructor_=theano.sparse.shared,