Fix make node for gpu svd and add infer shape

5f1e372d · Alexander Matyasko · 625e75cd · 5f1e372d · 5f1e372d · 5f1e372d
--- a/theano/gpuarray/linalg.py
+++ b/theano/gpuarray/linalg.py
@@ -8,7 +8,7 @@ import numpy as np
 from numpy.linalg.linalg import LinAlgError

 import theano
-from theano import Op, config
+from theano import Op, config, tensor
 from theano.gof import COp
 from theano.gpuarray import GpuArrayType

@@ -382,20 +382,39 @@ class GpuMagmaSVD(COp):
        A = as_gpuarray_variable(A, ctx_name)
        if A.ndim != 2:
            raise LinAlgError("Matrix rank error")
-        return theano.Apply(self, [A],
-                            [A.type(),
-                             GpuArrayType(A.dtype, broadcastable=[False],
-                                          context_name=ctx_name)(),
-                             A.type()])
+        if self.compute_uv:
+            return theano.Apply(self, [A],
+                                [A.type(),
+                                GpuArrayType(A.dtype, broadcastable=[False],
+                                             context_name=ctx_name)(),
+                                A.type()])
+        else:
+            return theano.Apply(self, [A],
+                                [GpuArrayType(A.dtype, broadcastable=[False],
+                                              context_name=ctx_name)()])

    def get_params(self, node):
        return node.inputs[0].type.context

    def get_op_params(self):
-        compute_uv = int(self.compute_uv)
-        full_matrices = int(self.full_matrices)
-        return [('COMPUTE_UV', compute_uv),
-                ('FULL_MATRICES', full_matrices)]
+        params = []
+        if self.compute_uv:
+            params.append(('COMPUTE_UV', '1'))
+        if self.full_matrices:
+            params.append(('FULL_MATRICES', '1'))
+        return params
+
+    def infer_shape(self, node, shapes):
+        x_shape, = shapes
+        M, N = x_shape
+        K = tensor.minimum(M, N)
+        s_shape = (K, )
+        if self.compute_uv:
+            u_shape = (M, M) if self.full_matrices else (M, K)
+            vt_shape = (N, N) if self.full_matrices else (K, N)
+            return [u_shape, s_shape, vt_shape]
+        else:
+            return [s_shape]


 def gpu_svd(a, full_matrices=1, compute_uv=1):

--- a/theano/gpuarray/magma_svd.c
+++ b/theano/gpuarray/magma_svd.c
@@ -4,8 +4,14 @@ setup_ext_cuda();

 #section support_code_struct

-int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
-                              PyGpuArrayObject **S, PyGpuArrayObject **VT,
+int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
+#ifdef COMPUTE_UV
+                              PyGpuArrayObject **U,
+#endif
+                              PyGpuArrayObject **S,
+#ifdef COMPUTE_UV
+                              PyGpuArrayObject **VT,
+#endif
                              PyGpuContextObject *c) {
  magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
  magma_vec_t jobu, jobv;
@@ -56,37 +62,35 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
    goto fail;
  }

-  if (COMPUTE_UV) {
-    if (FULL_MATRICES) {
-      jobu = MagmaAllVec;
-      jobv = MagmaAllVec;
-    }
-    else {
-      jobu = MagmaSomeVec;
-      jobv = MagmaSomeVec;
-    }
-    M_U  = (jobu == MagmaAllVec ? M : K);
-    N_VT = (jobv == MagmaAllVec ? N : K);
-    ldu = M;
-    ldv = N_VT;
-
-    if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
-      PyErr_SetString(PyExc_RuntimeError,
-                      "GpuMagmaSVD: failed to allocate memory");
-      goto fail;
-    }
-    if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
-      PyErr_SetString(PyExc_RuntimeError,
-                      "GpuMagmaSVD: failed to allocate memory");
-      goto fail;
-    }
+#ifdef COMPUTE_UV
+#ifdef FULL_MATRICES
+  jobu = MagmaAllVec;
+  jobv = MagmaAllVec;
+#else
+  jobu = MagmaSomeVec;
+  jobv = MagmaSomeVec;
+#endif
+  M_U  = (jobu == MagmaAllVec ? M : K);
+  N_VT = (jobv == MagmaAllVec ? N : K);
+  ldu = M;
+  ldv = N_VT;
+
+  if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaSVD: failed to allocate memory");
+    goto fail;
  }
-  else {
-    jobu = MagmaNoVec;
-    jobv = MagmaNoVec;
-    ldu = M;
-    ldv = N;
+  if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaSVD: failed to allocate memory");
+    goto fail;
  }
+#else
+  jobu = MagmaNoVec;
+  jobv = MagmaNoVec;
+  ldu = M;
+  ldv = N;
+#endif

  // query for workspace size
  magma_sgesvd(jobu, jobv, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
@@ -124,6 +128,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
  cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
             cudaMemcpyDeviceToDevice);

+#ifdef COMPUTE_UV
  u_dims[0] = N; u_dims[1] = N_VT;
  if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
    PyErr_SetString(PyExc_RuntimeError,
@@ -145,7 +150,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
  // to match numpy.linalg.svd output
  cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
             cudaMemcpyDeviceToDevice);
-
+#endif
  res = 0;
 fail:
  if (a_data != NULL)

--- a/theano/gpuarray/tests/test_linalg.py
+++ b/theano/gpuarray/tests/test_linalg.py
@@ -292,7 +292,7 @@ class TestMagma(unittest.TestCase):
            mode=mode_with_gpu.including('magma'))

        A_val = rand(50, 100)
-        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1])
+        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))

        A_val = rand(100, 50)
-        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1])
+        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))