提交 0c354ad5 authored 作者: Frederic's avatar Frederic

fix gh-400. remove GpuOuter completly. It is replaced by GpuGer.

I checked and all is fine with this remove.
上级 a22a1212
...@@ -497,101 +497,6 @@ gpu_ger_no_inplace = GpuGer(inplace=False) ...@@ -497,101 +497,6 @@ gpu_ger_no_inplace = GpuGer(inplace=False)
gpu_ger_inplace = GpuGer(inplace=True) gpu_ger_inplace = GpuGer(inplace=True)
class GpuOuter(GpuOp):
""" Implement outer on the gpu."""
def make_node(self, x, y):
# we suppose type checking has been done, but make sure.
assert (x.type.ndim == 1 and y.type.ndim == 1 and
x.type.dtype == 'float32' and y.type.dtype == 'float32')
bz = [x.type.broadcastable[0], y.type.broadcastable[0]]
outputs = [CudaNdarrayType(dtype='float32', broadcastable=bz)()]
return Apply(self, [x, y], outputs)
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def c_code_cache_version(self):
return (4,)
def c_code(self, node, name, inputs, outputs, sub):
# A = x * y'
x, y = inputs
A, = outputs
fail = sub['fail']
return """
CudaNdarray *%(name)sx = NULL, *%(name)sy = NULL;
int %(name)sres;
if (CudaNdarray_HOST_STRIDES(%(x)s)[0] < 0) {
%(name)sx = (CudaNdarray *)CudaNdarray_Copy(%(x)s);
if (!%(name)sx) {
%(fail)s;
}
} else {
%(name)sx = %(x)s;
Py_INCREF(%(name)sx);
}
if (CudaNdarray_HOST_STRIDES(%(y)s)[0] < 0) {
%(name)sy = (CudaNdarray *)CudaNdarray_Copy(%(y)s);
if (!%(name)sy) {
Py_DECREF(%(name)sx);
%(fail)s;
}
} else {
%(name)sy = %(y)s;
Py_INCREF(%(name)sy);
}
if (!(%(A)s &&
CudaNdarray_HOST_DIMS(%(A)s)[0] ==
CudaNdarray_HOST_DIMS(%(x)s)[0] &&
CudaNdarray_HOST_DIMS(%(A)s)[1] ==
CudaNdarray_HOST_DIMS(%(y)s)[0] &&
CudaNdarray_is_c_contiguous(%(A)s))) {
Py_XDECREF(%(A)s);
int dims[2];
dims[0] = CudaNdarray_HOST_DIMS(%(x)s)[0];
dims[1] = CudaNdarray_HOST_DIMS(%(y)s)[0];
%(A)s = (CudaNdarray *)CudaNdarray_ZEROS(2, dims);
if (!%(A)s) {
Py_DECREF(%(name)sy);
Py_DECREF(%(name)sx);
%(fail)s;
}
}
else
{
// sger accumulates into A. We need to zero it first.
int total_size = (sizeof(real) *
CudaNdarray_HOST_DIMS(%(A)s)[0] *
CudaNdarray_HOST_DIMS(%(A)s)[1]);
if (cudaSuccess != cudaMemset(%(A)s->devdata, 0, total_size))
{
PyErr_Format(PyExc_MemoryError,
"GpuOuter: Error memsetting %%d bytes of device memory.",
total_size);
Py_DECREF(%(name)sy);
Py_DECREF(%(name)sx);
%(fail)s;
}
}
%(name)sres = CudaNdarray_sger(1.0, %(name)sx, %(name)sy, %(A)s);
Py_DECREF(%(name)sy);
Py_DECREF(%(name)sx);
if (%(name)sres) {
%(fail)s;
}
""" % dict(x=x, y=y, A=A, fail=fail, name=name)
gpu_outer = GpuOuter()
## ##
# Not really a BLAS operation, but whatever. # Not really a BLAS operation, but whatever.
# #
......
...@@ -21,7 +21,7 @@ from theano.gof.python25 import all, any ...@@ -21,7 +21,7 @@ from theano.gof.python25 import all, any
from theano.sandbox.cuda.basic_ops import * from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar, from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
gpu_gemm_inplace, gpu_gemm_no_inplace, gpu_outer, GpuConv) gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv)
from theano.sandbox.cuda.blas import gpu_gemv_inplace from theano.sandbox.cuda.blas import gpu_gemv_inplace
from theano.sandbox.cuda.blas import gpu_gemv_no_inplace from theano.sandbox.cuda.blas import gpu_gemv_no_inplace
from theano.sandbox.cuda.blas import gpu_ger_inplace from theano.sandbox.cuda.blas import gpu_ger_inplace
...@@ -579,26 +579,6 @@ def local_gpu_gemm(node): ...@@ -579,26 +579,6 @@ def local_gpu_gemm(node):
return False return False
@register_opt()
@local_optimizer([])
def local_gpu_outer(node):
"""
gpu_dot22(col, row) -> gpu_outer
"""
if node.op == gpu_dot22:
l, r = node.inputs
if l.type.broadcastable[1] and r.type.broadcastable[0]:
# TODO: we would like to remove the double-dimshuffle when
# l or r is already the output of a GpuDimshuffle. To do
# this, refactor the logic in tensor/opt.py that collapses
# dimshuffle chains so that we can call it from here.
lvec = GpuDimShuffle(l.broadcastable, [0])(l)
rvec = GpuDimShuffle(r.broadcastable, [1])(r)
return [gpu_outer(lvec, rvec)]
return False
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([])
def local_gpu_sum(node): def local_gpu_sum(node):
......
...@@ -200,31 +200,6 @@ class TestBlasStridesGpu(TestBlasStrides): ...@@ -200,31 +200,6 @@ class TestBlasStridesGpu(TestBlasStrides):
mode = mode_with_gpu mode = mode_with_gpu
def test_outer():
x = tcn.shared_constructor(my_rand(8,), 'x')
y = tcn.shared_constructor(my_rand(6,), 'y')
x_val = x.get_value().copy()
y_val = y.get_value().copy()
f = pfunc([], tensor.outer(x, y), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val, y_val), f())
f = pfunc([], tensor.outer(x[::2], y), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val[::2], y_val), f())
f = pfunc([], tensor.outer(x, y[::3]), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val, y_val[::3]), f())
f = pfunc([], tensor.outer(x[::2], y[::3]), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val[::2], y_val[::3]), f())
f = pfunc([], tensor.outer(x[::-1], y), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val[::-1], y_val), f())
f = pfunc([], tensor.outer(x, y[::-1]), mode=mode_with_gpu)
assert numpy.allclose(numpy.outer(x_val, y_val[::-1]), f())
if 0: if 0:
# This is commented out because it doesn't make sense... # This is commented out because it doesn't make sense...
# tcn.blas has no op called DownsampleFactorMax # tcn.blas has no op called DownsampleFactorMax
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论