cpu_contiguous op + tests

07863099 · kelvinxu · 1003abb0 · 07863099 · 07863099
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -11,6 +11,56 @@ from theano.gradient import DisconnectedType
 tensor = basic
+class CpuContiguous(theano.Op):
+    """
+    Check to see if the input is c-contiguous,
+    if it is, do nothing, else return a contiguous array
+    """
+    __props__ = ()
+    view_map = {0: [0]}
+    def make_node(self, x):
+        x_ = theano.tensor.as_tensor_variable(x)
+        return theano.Apply(self, [x_], [x_.type()])
+    def perform(self, node, inputs, output_storage):
+        x, = inputs
+        y = output_storage[0]        
+        # if the ouput is contiguous do nothing, else copy
+        # the input
+        if not x.flags['C_CONTIGUOUS']:
+            x = x.copy()
+        assert x.flags['C_CONTIGUOUS']
+        y[0] = x 
+    def c_code(self, node, name, inames, onames, sub): 
+        x, = inames
+        y, = onames
+        code = """
+            if (!PyArray_CHKFLAGS(%(x)s, NPY_ARRAY_C_CONTIGUOUS)){
+                // check to see if output is contiguous first 
+                if (%(y)s != NULL && PyArray_CHKFLAGS(%(y)s, NPY_ARRAY_C_CONTIGUOUS)){
+                    PyArray_CopyInto(%(y)s, %(x)s);
+                }
+                else{
+                    Py_XDECREF(%(y)s);
+                    Py_XINCREF(%(x)s);
+                    %(y)s = PyArray_GETCONTIGUOUS(%(x)s);
+                }
+            }
+            else{
+                Py_XINCREF(%(x)s);
+                Py_XDECREF(%(y)s);
+                %(y)s = %(x)s;
+            }
+            """ % locals()
+        return code
+    def c_code_cache_version(self):
+        return (0,)
+cpu_contiguous = CpuContiguous()
 class CumsumOp(theano.Op):
    # See function cumsum for docstring
    def __init__(self, axis=None):

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -7,9 +7,9 @@ import theano
 from theano.tests import unittest_tools as utt
 from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod,
-                                     BinCountOp, bincount, DiffOp, diff,
+                                     CpuContiguous, cpu_contiguous, BinCountOp,
-                                     squeeze, compress, RepeatOp, repeat,
+                                     bincount, DiffOp, diff, squeeze, compress,
-                                     Bartlett, bartlett,
+                                     RepeatOp, repeat, Bartlett, bartlett,
                                     FillDiagonal, fill_diagonal,
                                     FillDiagonalOffset, fill_diagonal_offset,
                                     to_one_hot)
@@ -21,6 +21,18 @@ numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
 numpy_16 = bool(numpy_ver >= [1, 6])
+def test_cpu_contiguous():
+    a = T.fmatrix('a')
+    i = T.iscalar('i')
+    a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
+    f = theano.function([a, i], cpu_contiguous(a.reshape((5,4))[::i]))
+    topo = f.maker.fgraph.toposort()
+    assert any([isinstance(node.op, CpuContiguous) for node in topo])
+    assert f(a_val, 1).flags['C_CONTIGUOUS']
+    assert f(a_val, 2).flags['C_CONTIGUOUS']
+    assert f(a_val, 3).flags['C_CONTIGUOUS']
 class TestCumsumOp(utt.InferShapeTester):
    def setUp(self):