Fix remaining problems.

5622fc2a · Arnaud Bergeron · 86be5809 · 5622fc2a
--- a/theano/gpuarray/linalg.py
+++ b/theano/gpuarray/linalg.py
@@ -18,8 +18,6 @@ try:
 except (ImportError, OSError, RuntimeError, pkg_resources.DistributionNotFound):
    pass
-cusolver_handle = None
 class GpuCusolverSolve(Op):
    """
@@ -32,7 +30,7 @@ class GpuCusolverSolve(Op):
    """
-    __props__ = ('trans',)
+    __props__ = ('trans', 'inplace')
    def __init__(self, trans='N', inplace=False):
        self.trans = trans
@@ -42,10 +40,13 @@ class GpuCusolverSolve(Op):
        super(GpuCusolverSolve, self).__init__()
    def make_node(self, inp1, inp2):
-        self.context = basic_ops.infer_context_name(inp1, inp2)
+        if not cusolver_available:
+            raise RuntimeError('CUSOLVER is not available and '
+                               'GpuCusolverSolve Op can not be constructed.')
+        context_name = basic_ops.infer_context_name(inp1, inp2)
-        inp1 = basic_ops.as_gpuarray_variable(inp1, self.context)
+        inp1 = basic_ops.as_gpuarray_variable(inp1, context_name)
-        inp2 = basic_ops.as_gpuarray_variable(inp2, self.context)
+        inp2 = basic_ops.as_gpuarray_variable(inp2, context_name)
        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)
@@ -62,33 +63,24 @@ class GpuCusolverSolve(Op):
                          broadcastable=inp1.broadcastable,
                          context_name=self.context)()])
-    def make_thunk(self,
+    def prepare_node(self, node, storage_map, compute_map, impl):
-                   node,
+        ctx = node.inputs[0].type.context
-                   storage_map, _,
+        handle = getattr(ctx, 'cusolver_handle', None)
-                   no_recycling=[],
+        if handle is None:
-                   impl=None):
+            with ctx:
-        if not cusolver_available:
+                ctx.cusolver_handle = cusolver.cusolverDnCreate()
-            raise RuntimeError('CUSOLVER is not available and '
-                               'GpuCusolverSolve Op can not be constructed.')
-        inputs = [storage_map[v] for v in node.inputs]
-        outputs = [storage_map[v] for v in node.outputs]
-        global cusolver_handle
-        if cusolver_handle is None:
-            cusolver_handle = cusolver.cusolverDnCreate()
-        def thunk():
+    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context
        # Size of the matrices to invert.
        z = outputs[0]
        # Matrix.
-            A = inputs[0][0]
+        A = inputs[0]
        # Solution vectors.
-            b = inputs[1][0]
+        b = inputs[1]
        assert(len(A.shape) == 2)
        assert(len(b.shape) == 2)
@@ -124,29 +116,22 @@ class GpuCusolverSolve(Op):
        if A.flags['C_CONTIGUOUS']:
            trans = 1 - trans
+        with context:
            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)
-            if (thunk.workspace is None or
+        workspace = pygpu.zeros(workspace_size, dtype='float32',
-                    thunk.workspace.size != workspace_size):
-                thunk.workspace = pygpu.zeros(workspace_size,
-                                              dtype='float32',
                                context=context)
-            if thunk.pivots is None or thunk.pivots.size != min(n, n):
+        pivots = pygpu.zeros(n, dtype='int32', context=context)
-                thunk.pivots = pygpu.zeros(n,
-                                           dtype='int32',
-                                           context=context)
-            if thunk.dev_info is None:
+        dev_info = pygpu.zeros((1,), dtype='int32', context=context)
-                thunk.dev_info = pygpu.zeros((1,),
-                                             dtype='int32',
-                                             context=context)
        workspace_ptr = thunk.workspace.gpudata
        pivots_ptr = thunk.pivots.gpudata
        dev_info_ptr = thunk.dev_info.gpudata
+        with context:
            cusolver.cusolverDnSgetrf(
                cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                pivots_ptr, dev_info_ptr)
@@ -157,16 +142,6 @@ class GpuCusolverSolve(Op):
        z[0] = b
-        thunk.inputs = inputs
-        thunk.outputs = outputs
-        thunk.lazy = False
-        thunk.workspace = None
-        thunk.pivots = None
-        thunk.dev_info = None
-        return thunk
 def gpu_solve(A, b, trans='N'):
    return GpuCusolverSolve(trans)(A, b)