Add GpuReshape in the new gpu back-end.

2284a814 · Frederic · Arnaud Bergeron · 3aedd9be · 2284a814 · 2284a814
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -518,3 +518,44 @@ class GpuAlloc(HideC, Alloc):
        return (1,)
 gpu_alloc = GpuAlloc()
+class GpuReshape(HideC, tensor.Reshape):
+    """
+    Implement Reshape on the gpu.
+    """
+    # __hash__, __eq__, __str__ come from tensor.Reshape
+    def make_node(self, x, shp):
+        x = as_gpuarray_variable(x)
+        res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
+        otype = GpuArrayType(dtype=res.dtype,
+                             broadcastable=res.broadcastable)
+        return Apply(self, [x, shp], [otype()])
+    def perform(self, node, inp, out_):
+        x, shp = inp
+        out, = out_
+        if (len(shp) != self.ndim):
+            raise ValueError('shape argument to GpuReshape.perform'
+                             ' has incorrect length %i'
+                             ', should be %i' % (len(shp), self.ndim), shp)
+        s = shp.prod()
+        if shp.prod() != x.size:
+            # We need to do check here to raise the same error as NumPy.
+            # We should make pygpu do the same.
+            ss = 1
+            nb_m1 = 0
+            for i in shp:
+                if i == -1:
+                    nb_m1 += 1
+                else:
+                    ss *= i
+            if nb_m1 > 1:
+                raise ValueError("Only one -1 is accepted in the new shape")
+            elif nb_m1 == 1:
+                if (x.size % ss) != 0:
+                    raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.")
+            else:
+                raise ValueError("total size of new array must be unchanged")
+        out[0] = x.reshape(tuple(shp))
--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -11,7 +11,7 @@ from theano.gof.python25 import all, any
 from theano.sandbox.gpuarray.type import GpuArrayType
 from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
-                                               gpu_alloc)
+                                               gpu_alloc, GpuReshape)
 from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
                                              GpuDimShuffle, GpuCAReduce)
 from theano.sandbox.gpuarray.subtensor import GpuSubtensor
@@ -120,6 +120,20 @@ def local_gpualloc(node):
    return gpu_alloc
+@register_opt()
+@op_lifter(tensor.Reshape)
+def local_gpureshape(node):
+    op = node.op
+    name = op.name
+    if type(node.op) is not tensor.Reshape:
+        return None
+    if name:
+        name = 'Gpu'+name
+    res = GpuReshape(op.ndim, op.name)
+    o = res(*node.inputs)
+    return res
 @register_opt()
 @op_lifter(tensor.Elemwise)
 def local_gpu_elemwise(node):

--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -7,7 +7,7 @@ import theano
 import theano.tensor as T
 from theano.tensor import TensorType
 from theano.tensor.basic import alloc
-from theano.tensor.tests.test_basic import rand, safe_make_node
+from theano.tensor.tests.test_basic import rand, safe_make_node, T_reshape
 from theano.tests.unittest_tools import SkipTest
 from numpy.testing.noseclasses import KnownFailureTest
@@ -35,7 +35,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
 from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
                                               gpu_alloc, gpu_from_cuda,
                                               cuda_from_gpu, HostFromGpu,
-                                               GpuFromHost)
+                                               GpuFromHost, GpuReshape)
 from theano.tests import unittest_tools as utt
 utt.seed_rng()
@@ -44,11 +44,10 @@ rng = numpy.random.RandomState(seed=utt.fetch_seed())
 from pygpu import gpuarray
 if theano.config.mode == 'FAST_COMPILE':
-    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray')
+    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray').excluding('gpu')
-    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray'\
+    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray')
-)
 else:
-    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray')
+    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu')
    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
@@ -288,3 +287,22 @@ GpuAllocTester = makeTester(
        bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
        )
 )
+class G_reshape(T_reshape):
+    def shortDescription(self):
+        return None
+    def __init__(self, name):
+        T_reshape.__init__(self, name,
+                           shared=gpuarray_shared_constructor,
+                           op=GpuReshape,
+                           mode=mode_with_gpu,
+                           # avoid errors with limited devices
+#                             dtype='float32',
+                             ignore_topo=(HostFromGpu, GpuFromHost,
+                                          theano.compile.DeepCopyOp,
+                                          theano.sandbox.gpuarray.elemwise.GpuElemwise,
+                                          theano.tensor.opt.Shape_i,
+                                          theano.tensor.opt.MakeVector))
+        assert self.op == GpuReshape
--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -44,7 +44,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        dtensor3, SpecifyShape, Mean,
        itensor3, Tile, switch, Diagonal, Diag,
        nonzero, flatnonzero, nonzero_values,
-        stacklists)
+        stacklists, DimShuffle)
 from theano.tests import unittest_tools as utt
@@ -4204,9 +4204,30 @@ class T_op_cache(unittest.TestCase):
        self.assertTrue(numpy.all(fn_py(a) == fn_c_or_py(a)))
-class T_reshape(unittest.TestCase):
+class T_reshape(utt.InferShapeTester, utt.TestOptimizationMixin):
-    def setUp(self):
+    def __init__(self, name, shared=tensor._shared, op=Reshape, mode=None,
-        utt.seed_rng()
+                 ignore_topo=(DeepCopyOp, opt.MakeVector,
+                              opt.Shape_i, DimShuffle, theano.tensor.Elemwise)):
+        self.shared = shared
+        self.op = op
+        #The tag canonicalize is needed for the shape test in FAST_COMPILE
+        self.mode = mode
+        self.ignore_topo = ignore_topo
+        return super(T_reshape, self).__init__(name)
+    def function(self, inputs, outputs):
+        f = function(inputs, outputs, mode=self.mode)
+        if self.mode is not None or theano.config.mode != "FAST_COMPILE":
+            topo = f.maker.fgraph.toposort()
+            topo_ = [node for node in topo if not isinstance(node.op,
+                                                             self.ignore_topo)]
+            assert len(topo_) == 1, topo_
+        return f
+    def eval_output_and_check(self, t):
+        f = self.function([], t)
+        tval = f()
+        return tval
    def test_reshape(self):
        a = dvector()
@@ -4215,7 +4236,7 @@ class T_reshape(unittest.TestCase):
        #basic to 1 dim(without list)
        c = reshape(b, as_tensor_variable(6), ndim=1)
-        f = inplace_func([b], c)
+        f = self.function([b], c)
        b_val1 = numpy.asarray([[0, 1, 2], [3, 4, 5]])
        c_val1 = numpy.asarray([0, 1, 2, 3, 4, 5])
@@ -4231,7 +4252,7 @@ class T_reshape(unittest.TestCase):
        #basic to 1 dim(with list)
        c = reshape(b, (as_tensor_variable(6),), ndim=1)
-        f = inplace_func([b], c)
+        f = self.function([b], c)
        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
                         numpy.asarray([0, 1, 2, 3, 4, 5]))
        #print f.maker.fgraph.toposort()
@@ -4239,14 +4260,14 @@ class T_reshape(unittest.TestCase):
        #basic to shape object of same ndim
        c = reshape(b, d.shape)
-        f = inplace_func([b, d], c)
+        f = self.function([b, d], c)
        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]]),
                           [[0, 1], [2, 3], [4, 5]]) ==
                         numpy.asarray([[0, 1], [2, 3], [4, 5]]))
        #basic to 2 dims
        c = reshape(a, [2, 3])
-        f = inplace_func([a], c)
+        f = self.function([a], c)
        assert numpy.all(f(numpy.asarray([0, 1, 2, 3, 4, 5])) ==
                         numpy.asarray([[0, 1, 2], [3, 4, 5]]))
@@ -4255,7 +4276,7 @@ class T_reshape(unittest.TestCase):
        a_val_copy = numpy.asarray([0, 1, 2, 3, 4, 5])
        b_val = numpy.asarray([[0, 1, 2], [3, 4, 5]])
-        f_sub = inplace_func([a, b], c - b)
+        f_sub = self.function([a, b], c - b)
        assert numpy.all(f_sub(a_val, b_val) == 0.0)
        assert numpy.all(a_val == a_val_copy)
@@ -4264,35 +4285,33 @@ class T_reshape(unittest.TestCase):
        a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
        b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
-        f_sub = inplace_func([a, b], c - b)
+        f_sub = self.function([a, b], c - b)
        assert numpy.all(f_sub(a_val, b_val) == 0.0)
        assert numpy.all(a_val == a_val_copy)
        # verify gradient
        def just_vals(v):
            return Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
-        utt.verify_grad(just_vals, [a_val])
+        utt.verify_grad(just_vals, [a_val], mode=self.mode)
        #test infer_shape
-        f_sub = function([a, b], (c - b).shape)
+        self._compile_and_check([a], [c], (a_val,), self.op)
-        if config.mode == "FAST_COMPILE":
-            assert len(f_sub.maker.fgraph.toposort()) == 3
-        else:
-            topo = f_sub.maker.fgraph.toposort()
-            assert len(topo) == 1
-            topo[0].op == theano.compile.function_module.deep_copy_op
-            #assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5]]))==[2,3])#work in FAST_RUN, but fail on other!
-            #assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5],[6,7]]))==[2,3])#work in FAST_RUN, but fail on other!
        # test broadcast flag for constant value of 1
        c = reshape(b, (b.shape[0], b.shape[1], 1))
-        f = inplace_func([b], c)
+        f = self.function([b], c)
        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
                         numpy.asarray([[[0], [1], [2]], [[3], [4], [5]]]))
        assert (f.maker.fgraph.toposort()[-2].outputs[0].type.broadcastable ==
                (False, False, True))
-        assert numpy.all(f_sub(a_val, b_val) == [2, 3])
+    def test_m1(self):
+        t = tensor3()
+        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        val = rng.uniform(size=(3, 4, 5)).astype(config.floatX)
+        for out in [t.reshape([-1]), t.reshape([-1, 5]),
+                    t.reshape([5, -1]), t.reshape([5, -1, 3])]:
+            self._compile_and_check([t], [out], [val], self.op)
    def test_reshape_long_in_shape(self):
        v = dvector('v')
@@ -4311,14 +4330,14 @@ class T_reshape(unittest.TestCase):
        r = a.reshape(shapes, ndim=1)
        z = zeros_like(r)
-        f = function([a, shapes], z.shape)
+        f = self.function([a, shapes], z.shape)
        self.assertRaises(ValueError, f, a_val, [13])
        #Test reshape to 2 dim
        r = a.reshape(shapes, ndim=2)
        z = zeros_like(r)
-        f = function([a, shapes], z.shape)
+        f = self.function([a, shapes], z.shape)
        self.assertRaises(ValueError, f, a_val, [-1, 5])
        self.assertRaises(ValueError, f, a_val, [7, -1])

--- a/theano/tests/unittest_tools.py
+++ b/theano/tests/unittest_tools.py
@@ -182,7 +182,10 @@ class InferShapeTester(unittest.TestCase):
    def setUp(self):
        seed_rng()
        # Take into account any mode that may be defined in a child class
-        mode = getattr(self, 'mode', theano.compile.get_default_mode())
+        # and it can be None
+        mode = getattr(self, 'mode', None)
+        if mode is None:
+            mode = theano.compile.get_default_mode()
        # This mode seems to be the minimal one including the shape_i
        # optimizations, if we don't want to enumerate them explicitly.
        self.mode = mode.including("canonicalize")