fix gh-400. remove GpuOuter completly. It is replaced by GpuGer.

I checked and all is fine with this remove.

fix gh-400. remove GpuOuter completly. It is replaced by GpuGer.
0c354ad5 · Frederic · a22a1212 · 0c354ad5 · 0c354ad5 · 0c354ad5
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -497,101 +497,6 @@ gpu_ger_no_inplace = GpuGer(inplace=False)
 gpu_ger_inplace = GpuGer(inplace=True)
-class GpuOuter(GpuOp):
-    """ Implement outer on the gpu."""
-    def make_node(self, x, y):
-        # we suppose type checking has been done, but make sure.
-        assert (x.type.ndim == 1 and y.type.ndim == 1 and
-                x.type.dtype == 'float32' and y.type.dtype == 'float32')
-        bz = [x.type.broadcastable[0], y.type.broadcastable[0]]
-        outputs = [CudaNdarrayType(dtype='float32', broadcastable=bz)()]
-        return Apply(self, [x, y], outputs)
-    def __eq__(self, other):
-        return type(self) == type(other)
-    def __hash__(self):
-        return hash(type(self))
-    def c_code_cache_version(self):
-        return (4,)
-    def c_code(self, node, name, inputs, outputs, sub):
-        # A = x * y'
-        x, y = inputs
-        A, = outputs
-        fail = sub['fail']
-        return """
-        CudaNdarray *%(name)sx = NULL, *%(name)sy = NULL;
-        int %(name)sres;
-        if (CudaNdarray_HOST_STRIDES(%(x)s)[0] < 0) {
-            %(name)sx = (CudaNdarray *)CudaNdarray_Copy(%(x)s);
-            if (!%(name)sx) {
-                %(fail)s;
-            }
-        } else {
-            %(name)sx = %(x)s;
-            Py_INCREF(%(name)sx);
-        }
-        if (CudaNdarray_HOST_STRIDES(%(y)s)[0] < 0) {
-            %(name)sy = (CudaNdarray *)CudaNdarray_Copy(%(y)s);
-            if (!%(name)sy) {
-                Py_DECREF(%(name)sx);
-                %(fail)s;
-            }
-        } else {
-            %(name)sy = %(y)s;
-            Py_INCREF(%(name)sy);
-        }
-        if (!(%(A)s &&
-              CudaNdarray_HOST_DIMS(%(A)s)[0] ==
-                CudaNdarray_HOST_DIMS(%(x)s)[0] &&
-              CudaNdarray_HOST_DIMS(%(A)s)[1] ==
-                CudaNdarray_HOST_DIMS(%(y)s)[0] &&
-              CudaNdarray_is_c_contiguous(%(A)s))) {
-            Py_XDECREF(%(A)s);
-            int dims[2];
-            dims[0] = CudaNdarray_HOST_DIMS(%(x)s)[0];
-            dims[1] = CudaNdarray_HOST_DIMS(%(y)s)[0];
-            %(A)s = (CudaNdarray *)CudaNdarray_ZEROS(2, dims);
-            if (!%(A)s) {
-                Py_DECREF(%(name)sy);
-                Py_DECREF(%(name)sx);
-                %(fail)s;
-            }
-        }
-        else
-        {
-            // sger accumulates into A. We need to zero it first.
-            int total_size = (sizeof(real) *
-                                CudaNdarray_HOST_DIMS(%(A)s)[0] *
-                                CudaNdarray_HOST_DIMS(%(A)s)[1]);
-            if (cudaSuccess != cudaMemset(%(A)s->devdata, 0, total_size))
-            {
-                PyErr_Format(PyExc_MemoryError,
-                      "GpuOuter: Error memsetting %%d bytes of device memory.",
-                      total_size);
-                Py_DECREF(%(name)sy);
-                Py_DECREF(%(name)sx);
-                %(fail)s;
-            }
-        }
-        %(name)sres = CudaNdarray_sger(1.0, %(name)sx, %(name)sy, %(A)s);
-        Py_DECREF(%(name)sy);
-        Py_DECREF(%(name)sx);
-        if (%(name)sres) {
-            %(fail)s;
-        }
-        """ % dict(x=x, y=y, A=A, fail=fail, name=name)
-gpu_outer = GpuOuter()
 ##
 # Not really a BLAS operation, but whatever.
 #

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -21,7 +21,7 @@ from theano.gof.python25 import all, any
 from theano.sandbox.cuda.basic_ops import *
 from theano.sandbox.cuda.type import CudaNdarrayType
 from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
-        gpu_gemm_inplace, gpu_gemm_no_inplace, gpu_outer, GpuConv)
+        gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv)
 from theano.sandbox.cuda.blas import gpu_gemv_inplace
 from theano.sandbox.cuda.blas import gpu_gemv_no_inplace
 from theano.sandbox.cuda.blas import gpu_ger_inplace
@@ -579,26 +579,6 @@ def local_gpu_gemm(node):
    return False
-@register_opt()
-@local_optimizer([])
-def local_gpu_outer(node):
-    """
-    gpu_dot22(col, row) -> gpu_outer
-    """
-    if node.op == gpu_dot22:
-        l, r = node.inputs
-        if l.type.broadcastable[1] and r.type.broadcastable[0]:
-            # TODO: we would like to remove the double-dimshuffle when
-            # l or r is already the output of a GpuDimshuffle. To do
-            # this, refactor the logic in tensor/opt.py that collapses
-            # dimshuffle chains so that we can call it from here.
-            lvec = GpuDimShuffle(l.broadcastable, [0])(l)
-            rvec = GpuDimShuffle(r.broadcastable, [1])(r)
-            return [gpu_outer(lvec, rvec)]
-    return False
 @register_opt()
 @local_optimizer([])
 def local_gpu_sum(node):

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -200,31 +200,6 @@ class TestBlasStridesGpu(TestBlasStrides):
    mode = mode_with_gpu
-def test_outer():
-    x = tcn.shared_constructor(my_rand(8,), 'x')
-    y = tcn.shared_constructor(my_rand(6,), 'y')
-    x_val = x.get_value().copy()
-    y_val = y.get_value().copy()
-    f = pfunc([], tensor.outer(x, y), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val, y_val), f())
-    f = pfunc([], tensor.outer(x[::2], y), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val[::2], y_val), f())
-    f = pfunc([], tensor.outer(x, y[::3]), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val, y_val[::3]), f())
-    f = pfunc([], tensor.outer(x[::2], y[::3]), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val[::2], y_val[::3]), f())
-    f = pfunc([], tensor.outer(x[::-1], y), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val[::-1], y_val), f())
-    f = pfunc([], tensor.outer(x, y[::-1]), mode=mode_with_gpu)
-    assert numpy.allclose(numpy.outer(x_val, y_val[::-1]), f())
 if 0:
    # This is commented out because it doesn't make sense...
    # tcn.blas has no op called DownsampleFactorMax