提交 f8b8c31a authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuGer to gpuarray.

上级 9a44f9bf
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.tensor.blas import Dot22, Gemv, Gemm from theano.tensor.blas import Dot22, Gemv, Gemm, Ger
from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable) from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable)
try: try:
...@@ -135,6 +135,60 @@ gpugemm_no_inplace = GpuGemm(inplace=False) ...@@ -135,6 +135,60 @@ gpugemm_no_inplace = GpuGemm(inplace=False)
gpugemm_inplace = GpuGemm(inplace=True) gpugemm_inplace = GpuGemm(inplace=True)
class GpuGer(BlasOp, Ger):
def make_node(self, A, alpha, x, y):
res = Ger.make_node(self, A, alpha, x, y)
A = as_gpuarray_variable(A)
x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y)
assert A.dtype == x.dtype == y.dtype == alpha.dtype
return Apply(self, [A, alpha, x, y], [A.type()])
def perform(self, node, inp, out):
A, alpha, x, y = inp
inplace = self.destructive
if inplace and not A.flags.forc:
inplace = False
outputs[0][0] = blas.ger(alpha, x, y, A,
overwrite_a=inplace)
def c_code(self, node, name, inp, out, sub):
vars = dict(out=out[0], A=inp[0], alpha=inp[1], x=inp[2], y=inp[3],
fail=sub['fail'], name=name)
if self.destructive:
code = """
Py_XDECREF(%(out)s);
%(out)s = %(A)s;
Py_INCREF(%(out)s);
""" % vars
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
""" % vars
code += """
if (pygpu_blas_rger(((dtype_%(alpha)s *)PyArray_DATA(%(alpha)s))[0],
%(x)s, %(y)s, %(out)s, 0) == -1) {
%(fail)s
}
""" % vars
if config.gpuarray.sync:
code += """
GpuArray_sync(&%(out)s->ga);
""" % vars
return code
def c_code_cache_version(self):
return (0,)
gpuger_no_inplace = GpuGer(destructive=False)
gpuger_inplace = GpuGer(destructive=True)
class GpuDot22(BlasOp, Dot22): class GpuDot22(BlasOp, Dot22):
def make_node(self, x, y): def make_node(self, x, y):
res = Dot22.make_node(self, x, y) res = Dot22.make_node(self, x, y)
...@@ -211,6 +265,11 @@ def local_inplace_gpuagemm(node): ...@@ -211,6 +265,11 @@ def local_inplace_gpuagemm(node):
if node.op == gpugemm_no_inplace: if node.op == gpugemm_no_inplace:
return [gpugemm_inplace(*node.inputs)] return [gpugemm_inplace(*node.inputs)]
@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
if node.op == gpuger_no_inplace:
return [gpuger_inplace(*node.inputs)]
gpuablas_opt_inplace = in2out(LocalOptGroup( gpuablas_opt_inplace = in2out(LocalOptGroup(
local_inplace_gpuagemv, local_inplace_gpuagemm), local_inplace_gpuagemv, local_inplace_gpuagemm),
name='gpuablas_opt_inplace') name='gpuablas_opt_inplace')
......
...@@ -17,7 +17,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, ...@@ -17,7 +17,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
GpuAlloc, GpuAlloc,
GpuReshape, GpuReshape,
GpuEye) GpuEye)
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
from theano.sandbox.gpuarray.conv import GpuConv from theano.sandbox.gpuarray.conv import GpuConv
from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
...@@ -302,23 +302,23 @@ def local_gpua_careduce(node): ...@@ -302,23 +302,23 @@ def local_gpua_careduce(node):
@register_opt() @register_opt()
@op_lifter([tensor.blas.Gemv]) @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
def local_gpua_gemv(node): def local_gpua_gemv(node):
return GpuGemv(inplace=node.op.inplace) return GpuGemv(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas_c.CGemv])
def local_gpua_gemv2(node):
return GpuGemv(inplace=node.op.inplace)
@register_opt() @register_opt()
@op_lifter([tensor.blas.Gemm]) @op_lifter([tensor.blas.Gemm])
def local_gpua_gemm(node): def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace) return GpuGemm(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas.Ger, tensor.blas_c.CGer])
def local_gpua_ger(node):
return GpuGer(destructive=node.op.destructive)
@register_opt() @register_opt()
@op_lifter([tensor.blas.Dot22]) @op_lifter([tensor.blas.Dot22])
def local_gpua_dot22(node): def local_gpua_dot22(node):
......
from unittest import TestCase from unittest import TestCase
import theano import theano
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22 from theano.tensor.blas import (gemv_inplace, gemm_inplace, ger_destructive,
_dot22)
from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand
from theano.sandbox.gpuarray.blas import (gpugemv_inplace, from theano.sandbox.gpuarray.blas import (gpugemv_inplace,
gpugemm_inplace, gpu_dot22) gpugemm_inplace, gpuger_inplace,
gpu_dot22)
GpuGemvTester = makeTester('GpuGemvTester', GpuGemvTester = makeTester('GpuGemvTester',
...@@ -40,6 +42,18 @@ GpuGemmTester = makeTester('GpuGemmTester', ...@@ -40,6 +42,18 @@ GpuGemmTester = makeTester('GpuGemmTester',
) )
) )
GpuGerTester = makeTester(
'GpuGerTester',
op=ger_destructive, gpu_op=gpuger_inplace,
cases=dict(
test1=[rand(4, 5), 1.0, rand(4), rand(5)],
test2=[rand(4, 5), 0.6, rand(4), rand(5)],
test3=[rand(4, 5), -1.0, rand(4), rand(5)],
test4=[rand(4, 5), -0.6, rand(4), rand(5)],
test5=[rand(4, 5), 0.0, rand(4), rand(5)],
)
)
GpuDot22Tester = makeTester( GpuDot22Tester = makeTester(
'GpuGemmTester', 'GpuGemmTester',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论