Small fixes on dimshuffle and reshape. tests pass.

a0febb21 · Pascal Lamblin · Caglar · 9895e2e2 · a0febb21
--- a/theano/sandbox/cuda/cula.py
+++ b/theano/sandbox/cuda/cula.py
@@ -6,7 +6,8 @@ from theano.sandbox.cuda import GpuOp

 from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable

-cula_available = False
+dimshuffle = cuda_ndarray.cuda_ndarray.dimshuffle
+
 try:
    from scikits.cuda import cula
    cula_available = False
@@ -74,11 +75,13 @@ class GpuSolve(GpuOp):
            # Solution vectors
            b = inputs[1][0]

-            A_cpy = A.copy()
-            b_cpy = b.copy()

-            # Convert b to F-order from c-order.
-            b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1]))
+            b_cpy = dimshuffle(b, (1, 0)).reshape((b.shape[0], b.shape[1]))
+
+            # This copy forces allocation of a new C-contiguous buffer
+            # and returns it.
+            A_cpy = A.copy()
+            b_cpy = b_cpy.copy()

            A_pycuda = to_gpuarray(A_cpy)
            b_pycuda = to_gpuarray(b_cpy)
@@ -117,8 +120,10 @@ class GpuSolve(GpuOp):

            A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)

-            # Convert b to F-order from c-order and assign it to output:
-            z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0)
+            #Convert b to F-order from c-order and assign it to output:
+            b_cpy = b_cpy.reshape(b.shape[::-1])
+            b_cpy = dimshuffle(b_cpy, (1, 0))
+            z[0] = b_cpy

        thunk.inputs = inputs
        thunk.outputs = outputs