Merge pull request #5286 from harmdevries89/gemm_zeros

Uninitialized memory returned from empty GpuGemv dot-product

Merge pull request #5286 from harmdevries89/gemm_zeros
236cc634 · Frédéric Bastien · GitHub · 53b8093e · 4080eb96 · 236cc634
--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
@@ -92,7 +92,14 @@ class GpuGemv(BlasOp):
                   }
                   """ % vars
        code += """
-        if (pygpu_blas_rgemv(cb_no_trans,
+        if (PyGpuArray_DIM(%(A)s, 1) == 0) {
+          int code;
+          code = GpuArray_memset(&%(out)s->ga, 0);
+          if (code != GA_NO_ERROR) {
+            PyErr_SetString(PyExc_RuntimeError, "Memset failed");
+            %(fail)s
+          }
+        } else if (pygpu_blas_rgemv(cb_no_trans,
                             ((dtype_%(alpha)s *)PyArray_DATA(%(alpha)s))[0],
                             %(A)s, %(x)s,
                             ((dtype_%(beta)s *)PyArray_DATA(%(beta)s))[0],
@@ -107,7 +114,7 @@ class GpuGemv(BlasOp):
        return code
    def c_code_cache_version(self):
-        return (4,)
+        return (5,)
 gpugemv_no_inplace = GpuGemv(inplace=False)
 gpugemv_inplace = GpuGemv(inplace=True)

--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function, division
 from unittest import TestCase
 from nose.plugins.skip import SkipTest
 import itertools
+import numpy
 import theano
 from theano import tensor
@@ -128,3 +129,17 @@ GpuDot22Tester = makeTester(
        # test9=[rand(0, 0), rand(0, 0)],
    )
 )
+def test_gemv_zeros():
+    W = tensor.matrix()
+    v = tensor.vector()
+    f = theano.function([W, v], W.dot(v), mode=mode_with_gpu)
+    # Apply to an empty matrix shape (5,0) and an empty vector shape (0,)
+    dim = 1000
+    A = numpy.zeros((dim, 0), dtype=theano.config.floatX)
+    b = numpy.zeros((0,), dtype=theano.config.floatX)
+    tmp = f(A, b)
+    assert numpy.allclose(tmp,
+                          numpy.zeros((dim,)))