finish gpu ger tests:

-renamed TestGer_local_gemm_to_ger to TestGer -changed the Ger.infer_shape methode with ger.perform This make test of cpu and gpu ger op easier and follow what is done most of the time -Moved tests in TestGer_make_thunk to TestGer and use theano.function This was needed to make them work on cpu and gpu -make TestGer always call call the compiled function

finish gpu ger tests:
c500bafa · Frederic · 6174405c · c500bafa · c500bafa · c500bafa
--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -17,7 +17,7 @@ import theano.sandbox.cuda as tcn
 from theano.tensor.signal.downsample import DownsampleFactorMax, DownsampleFactorMaxGrad
 import theano.compile.mode
-from theano.tensor.tests.test_blas import BaseGemv, TestGer_local_gemm_to_ger
+from theano.tensor.tests.test_blas import BaseGemv, TestGer
 from theano.sandbox.cuda.blas import gpu_gemv_no_inplace, gpu_gemv_inplace
 from theano.sandbox.cuda.blas import gpu_ger_inplace, gpu_ger_no_inplace
@@ -261,10 +261,9 @@ class TestGpuGemv(TestCase, BaseGemv,
    gemv_inplace = gpu_gemv_inplace
-class TestGpuGer(TestGer_local_gemm_to_ger):
+class TestGpuGer(TestGer):
    def setUp(self):
-        self.mode = theano.compile.get_default_mode().including(
+        self.mode = mode_with_gpu
-            'fast_run', 'gpu')
        self.mode = self.mode.excluding('c_blas')
        dtype = self.dtype = 'float32'  # optimization isn't dtype-dependent
        self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -282,31 +282,19 @@ class Ger(Op):
            raise TypeError('only float and complex types supported', x.dtype)
        return Apply(self, [A, alpha, x, y], [A.type()])
-    def make_thunk(self, node, storage_map, compute_map, no_recycling):
+    def perform(self, node, inp, out):
-        node_input_storage = [storage_map[r] for r in node.inputs]
+        cA, calpha, cx, cy = inp
-        node_output_storage = [storage_map[r] for r in node.outputs]
+        cZ, = out
+        if self.destructive:
-        # get vars for containers
+            A = cA
-        cA, calpha, cx, cy = node_input_storage
+        else:
-        cZ, = node_output_storage
+            A = cA.copy()
+        if calpha != 1:
+            A += calpha * numpy.outer(cx, cy)
+        else:
+            A += numpy.outer(cx, cy)
+        cZ[0] = A
-        def rval():
-            if self.destructive:
-                A = cA[0]
-            else:
-                A = cA[0].copy()
-            if calpha[0] != 1:
-                A += calpha[0] * numpy.outer(cx[0], cy[0])
-            else:
-                A += numpy.outer(cx[0], cy[0])
-            cZ[0] = A
-        #TODO: If this is currently an unofficial part of the thunk API,
-        #      then maybe it should be documented and made official?
-        rval.inputs = node_input_storage
-        rval.outputs = node_output_storage
-        rval.lazy = False
-        return rval
 ger = Ger(destructive=False)
 ger_destructive = Ger(destructive=True)

--- a/theano/tensor/blas_c.py
+++ b/theano/tensor/blas_c.py
@@ -200,9 +200,6 @@ class CGer(BaseBLAS, Ger):
    def c_code_cache_version(self):
        return (2,)
-    def make_thunk(*args, **kwargs):
-        # skip over Ger.make_thunk
-        return Op.make_thunk(*args, **kwargs)
 @local_optimizer([ger, ger_destructive])
 def use_c_ger(node):

--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
@@ -1280,74 +1280,7 @@ class TestGer_OpContract(TestCase, unittest_tools.T_OpContractMixin):
        return Ger(op.destructive)
-class TestGer_make_thunk(TestCase):
+class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
-    def setUp(self):
-        self.rng = numpy.random.RandomState(unittest_tools.fetch_seed())
-    def given_dtype(self, dtype, M, N):
-        sA = T.tensor(dtype=dtype, broadcastable=(False, False))
-        sa = T.tensor(dtype=dtype, broadcastable=())
-        sx = T.tensor(dtype=dtype, broadcastable=(False,))
-        sy = T.tensor(dtype=dtype, broadcastable=(False,))
-        sZ = ger(sA, sa, sx, sy)
-        node = sZ.owner
-        storage_map = {sA:[None], sa:[None], sx:[None], sy:[None], sZ:[None]}
-        thunk = ger.make_thunk(node, storage_map,
-                compute_map={}, no_recycling=[])
-        # non-standard for make_thunk to receive node.op != self,
-        # but works for now.
-        thunk_d = ger_destructive.make_thunk(node, storage_map,
-                compute_map={}, no_recycling=[])
-        def rand(*shape):
-            return numpy.asarray(1 + self.rng.rand(*shape), dtype=dtype)
-        storage_map[sA][0] = rand(M, N)
-        storage_map[sa][0] = rand()
-        storage_map[sx][0] = rand(M)
-        storage_map[sy][0] = rand(N)
-        storage_map_copy = dict([(k,[deepcopy(v[0])]) for k,v in storage_map.items()])
-        # TODO: do some DebugMode-type verifications here
-        #       if this can be refactored into a Mixin that does the DebugMode
-        #       stuff on just one thunk at a time.  Do it in the style of
-        #       TestOpContractMixin?
-        #       - Compare with Elemwise testers
-        thunk()
-        assert numpy.all(storage_map[sZ][0] ==
-                storage_map[sA][0] + storage_map[sa][0] *
-                numpy.outer(storage_map[sx][0], storage_map[sy][0]))
-        assert storage_map[sZ][0].dtype == dtype
-        assert storage_map[sZ][0].shape == (M, N)
-        thunk_d()
-        assert numpy.all(storage_map[sZ][0] !=
-                storage_map[sA][0] + storage_map[sa][0] *
-                numpy.outer(storage_map[sx][0], storage_map[sy][0]))
-        assert numpy.all(storage_map[sZ][0] ==
-                storage_map_copy[sA][0] + storage_map[sa][0] *
-                numpy.outer(storage_map[sx][0], storage_map[sy][0]))
-        assert storage_map[sZ][0].dtype == dtype
-        assert storage_map[sZ][0].shape == (M, N)
-    def test_f32_0_0(self): return self.given_dtype('float32', 0, 0)
-    def test_f32_1_0(self): return self.given_dtype('float32', 1, 0)
-    def test_f32_0_1(self): return self.given_dtype('float32', 0, 1)
-    def test_f32_1_1(self): return self.given_dtype('float32', 1, 1)
-    def test_f32_4_4(self): return self.given_dtype('float32', 4, 4)
-    def test_f64_4_5(self): return self.given_dtype('float64', 4, 5)
-    def test_c64_7_1(self): return self.given_dtype('complex64', 7, 1)
-    def test_c128_1_9(self): return self.given_dtype('complex128', 1, 9)
-class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
    def setUp(self):
        self.mode = theano.compile.get_default_mode().including('fast_run')
@@ -1366,23 +1299,26 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
    def tearDown(self):
        theano.tensor.blas_scipy.optimizations_enabled = self.origval
-    def function(self, inputs, outputs):
+    def function(self, inputs, outputs, updates={}):
-        return theano.function(inputs, outputs, self.mode)
+        return theano.function(inputs, outputs, self.mode, updates=updates)
    def b(self, bval):
        return T.as_tensor_variable(numpy.asarray(bval, dtype=self.dtype))
    def test_b_0_triggers_ger(self):
+        """ test local_gemm_to_ger opt"""
        assert T.blas.local_gemm_to_ger.transform(
                gemm_no_inplace(
                    self.A, self.a, self.x.dimshuffle(0,'x'),
                    self.y.dimshuffle('x', 0), self.b(0)).owner)
    def test_b_1_triggers_ger(self):
+        """ test local_gemm_to_ger opt"""
        assert T.blas.local_gemm_to_ger.transform(
                gemm_no_inplace(
                    self.A, self.a, self.x.dimshuffle(0,'x'),
                    self.y.dimshuffle('x', 0), self.b(1)).owner)
    def test_b_other_does_not_triggers_ger(self):
+        """ test local_gemm_to_ger opt"""
        assert not T.blas.local_gemm_to_ger.transform(
                gemm_no_inplace(
                    self.A, self.a, self.x.dimshuffle(0,'x'),
@@ -1391,16 +1327,24 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
    def test_outer(self):
        f = self.function([self.x, self.y], T.outer(self.x, self.y))
        self.assertFunctionContains(f, self.ger_destructive)
+        f(numpy.random.rand(5).astype(self.dtype),
+          numpy.random.rand(4).astype(self.dtype))
    def test_A_plus_outer(self):
        f = self.function([self.A, self.x, self.y],
                self.A + T.outer(self.x, self.y))
        self.assertFunctionContains(f, self.ger)
+        f(numpy.random.rand(5, 4).astype(self.dtype),
+          numpy.random.rand(5).astype(self.dtype),
+          numpy.random.rand(4).astype(self.dtype))
    def test_A_plus_scaled_outer(self):
        f = self.function([self.A, self.x, self.y],
                self.A + 0.1 * T.outer(self.x, self.y))
        self.assertFunctionContains(f, self.ger)
+        f(numpy.random.rand(5, 4).astype(self.dtype),
+          numpy.random.rand(5).astype(self.dtype),
+          numpy.random.rand(4).astype(self.dtype))
    def test_scaled_A_plus_scaled_outer(self):
        f = self.function([self.A, self.x, self.y],
@@ -1410,3 +1354,49 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
        # Why gemm? This make the graph simpler did we test that it
        # make it faster?
        self.assertFunctionContains(f, self.gemm)
+        f(numpy.random.rand(5, 4).astype(self.dtype),
+          numpy.random.rand(5).astype(self.dtype),
+          numpy.random.rand(4).astype(self.dtype))
+    def given_dtype(self, dtype, M, N):
+        """ test corner case shape and dtype"""
+        f = self.function([self.A, self.x, self.y],
+                self.A + 0.1 * T.outer(self.x, self.y))
+        self.assertFunctionContains(f, self.ger)
+        f(numpy.random.rand(M, N).astype(self.dtype),
+          numpy.random.rand(M).astype(self.dtype),
+          numpy.random.rand(N).astype(self.dtype))
+    def test_f32_0_0(self):
+        return self.given_dtype('float32', 0, 0)
+    def test_f32_1_0(self):
+        return self.given_dtype('float32', 1, 0)
+    def test_f32_0_1(self):
+        return self.given_dtype('float32', 0, 1)
+    def test_f32_1_1(self):
+        return self.given_dtype('float32', 1, 1)
+    def test_f32_4_4(self):
+        return self.given_dtype('float32', 4, 4)
+    def test_f64_4_5(self):
+        return self.given_dtype('float64', 4, 5)
+    def test_c64_7_1(self):
+        return self.given_dtype('complex64', 7, 1)
+    def test_c128_1_9(self):
+        return self.given_dtype('complex128', 1, 9)
+    def test_inplace(self):
+        A = theano.shared(numpy.random.rand(4, 5).astype(self.dtype))
+        f = self.function([self.x, self.y], [],
+                          updates={A: A + T.constant(0.1, dtype=self.dtype) *
+                                   T.outer(self.x, self.y)})
+        self.assertFunctionContains(f, self.ger_destructive)
+        f(numpy.random.rand(4).astype(self.dtype),
+          numpy.random.rand(5).astype(self.dtype))