Make ScipyGer use prepare_node

6349790c · Frederic Bastien · ca40ef22 · 6349790c · 6349790c
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -2172,8 +2172,8 @@ class GpuConv(GpuOp):
            bmode = 0
        if max_threads_dim0 is None:
            raise NotImplementedError("GpuConv.c_code should not be called "
-                                      "directly. It should be called by "
+                                      "directly. It should be called after "
-                                      "make_thunk() that add some information "
+                                      "prepare_node() that add some information "
                                      "related to the selected GPU.")
        sub.update(locals())
        return """

--- a/theano/tensor/blas_scipy.py
+++ b/theano/tensor/blas_scipy.py
@@ -22,46 +22,34 @@ if have_fblas:
 class ScipyGer(Ger):
-    # keep everything else, but override the make_thunk
+    def prepare_node(self, node, storage_map, compute_map):
-    def make_thunk(self, node, storage_map, compute_map, no_recycling):
+        if impl == 'py':
+            node.tag.local_ger = _blas_ger_fns[numpy.dtype(
-        node_input_storage = [storage_map[r] for r in node.inputs]
+                node.inputs[0].type.dtype)]
-        node_output_storage = [storage_map[r] for r in node.outputs]
-        node_output_compute = [compute_map[r] for r in node.outputs]
+    def perform(self, node, inputs, output_storage):
+        cA, calpha, cx, cy = inputs
-        # get vars for containers
+        cZ, = output_storage
-        cA, calpha, cx, cy = node_input_storage
+        # N.B. some versions of scipy (e.g. mine) don't actually work
-        cZ, = node_output_storage
+        # in-place on a, even when I tell it to.
-        local_ger = _blas_ger_fns[numpy.dtype(node.inputs[0].type.dtype)]
+        A = cA
+        local_ger = node.tag.local_ger
-        def rval():
+        if A.size == 0:
-            # N.B. some versions of scipy (e.g. mine) don't actually work
+            # We don't have to compute anything, A is empty.
-            # in-place on a, even when I tell it to.
+            # We need this special case because Numpy considers it
-            A = cA[0]
+            # C-contiguous, wich is confusing.
-            if A.size == 0:
+            if not self.destructive:
-                # We don't have to compute anything, A is empty.
+                # Sometimes numpy thinks empty matrices can share memory,
-                # We need this special case because Numpy considers it
+                # so here to stop DebugMode from complaining.
-                # C-contiguous, wich is confusing.
+                A = A.copy()
-                if not self.destructive:
+        elif A.flags['C_CONTIGUOUS']:
-                    # Sometimes numpy thinks empty matrices can share memory,
+            A = local_ger(calpha, cy, cx, a=A.T,
-                    # so here to stop DebugMode from complaining.
+                          overwrite_a=int(self.destructive)).T
-                    A = A.copy()
+        else:
-            elif A.flags['C_CONTIGUOUS']:
+            A = local_ger(calpha, cx, cy, a=A,
-                A = local_ger(calpha[0], cy[0], cx[0], a=A.T,
+                          overwrite_a=int(self.destructive))
-                              overwrite_a=int(self.destructive)).T
+        cZ[0] = A
-            else:
-                A = local_ger(calpha[0], cx[0], cy[0], a=A,
-                              overwrite_a=int(self.destructive))
-            cZ[0] = A
-            for o in node_output_compute:
-                o[0] = True
-        # TODO: If this is currently an unofficial part of the thunk API,
-        #      then maybe it should be documented and made official?
-        rval.inputs = node_input_storage
-        rval.outputs = node_output_storage
-        rval.lazy = False
-        return rval
 scipy_ger_no_inplace = ScipyGer(False)
 scipy_ger_inplace = ScipyGer(True)