提交 b457bb54 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1800 from abergeron/gpuarray_ger

Add GpuGer to gpuarray.
from theano import Op, Apply, config
from theano.tensor.blas import Dot22, Gemv, Gemm
from theano.tensor.blas import Dot22, Gemv, Gemm, Ger
from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable)
try:
......@@ -28,7 +28,7 @@ class GpuGemv(BlasOp, Gemv):
A = as_gpuarray_variable(A)
x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y)
assert A.dtype == x.dtype == y.dtype == alpha.dtype == beta.dtype
assert A.dtype == x.dtype == y.dtype
return Apply(self, [y, alpha, A, x, beta], [y.type()])
def perform(self, node, inputs, out_storage):
......@@ -45,8 +45,15 @@ class GpuGemv(BlasOp, Gemv):
if self.inplace:
code = """
Py_XDECREF(%(out)s);
%(out)s = %(y)s;
Py_INCREF(%(out)s);
if (%(y)s->ga.strides[0] <= 0) {
%(out)s = pygpu_copy(%(y)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
} else {
%(out)s = %(y)s;
Py_INCREF(%(out)s);
}
""" % vars
else:
code = """
......@@ -72,7 +79,7 @@ class GpuGemv(BlasOp, Gemv):
return code
def c_code_cache_version(self):
return (1,)
return (2,)
gpugemv_no_inplace = GpuGemv(inplace=False)
gpugemv_inplace = GpuGemv(inplace=True)
......@@ -84,7 +91,7 @@ class GpuGemm(BlasOp, Gemm):
A = as_gpuarray_variable(A)
B = as_gpuarray_variable(B)
C = as_gpuarray_variable(C)
assert A.dtype == B.dtype == C.dtype == alpha.dtype == beta.dtype
assert A.dtype == B.dtype == C.dtype
return Apply(self, [C, alpha, A, B, beta], [C.type()])
def perform(self, node, inputs, outputs):
......@@ -101,8 +108,15 @@ class GpuGemm(BlasOp, Gemm):
if self.inplace:
code = """
Py_XDECREF(%(out)s);
%(out)s = %(C)s;
Py_INCREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(C)s->ga)) {
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
} else {
%(out)s = %(C)s;
Py_INCREF(%(out)s);
}
""" % vars
else:
code = """
......@@ -128,13 +142,74 @@ class GpuGemm(BlasOp, Gemm):
return code
def c_code_cache_version(self):
return (1,)
return (2,)
gpugemm_no_inplace = GpuGemm(inplace=False)
gpugemm_inplace = GpuGemm(inplace=True)
class GpuGer(BlasOp, Ger):
def make_node(self, A, alpha, x, y):
res = Ger.make_node(self, A, alpha, x, y)
A = as_gpuarray_variable(A)
x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y)
assert A.dtype == x.dtype == y.dtype
return Apply(self, [A, alpha, x, y], [A.type()])
def perform(self, node, inp, out):
A, alpha, x, y = inp
inplace = self.destructive
if inplace and not A.flags.forc:
inplace = False
outputs[0][0] = blas.ger(alpha, x, y, A,
overwrite_a=inplace)
def c_code(self, node, name, inp, out, sub):
vars = dict(out=out[0], A=inp[0], alpha=inp[1], x=inp[2], y=inp[3],
fail=sub['fail'], name=name)
if self.destructive:
code = """
Py_XDECREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(A)s->ga)) {
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
} else {
%(out)s = %(A)s;
Py_INCREF(%(out)s);
}
""" % vars
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
""" % vars
code += """
if (pygpu_blas_rger(((dtype_%(alpha)s *)PyArray_DATA(%(alpha)s))[0],
%(x)s, %(y)s, %(out)s, 0) == -1) {
%(fail)s
}
""" % vars
if config.gpuarray.sync:
code += """
GpuArray_sync(&%(out)s->ga);
""" % vars
return code
def c_code_cache_version(self):
return (1,)
gpuger_no_inplace = GpuGer(destructive=False)
gpuger_inplace = GpuGer(destructive=True)
class GpuDot22(BlasOp, Dot22):
def make_node(self, x, y):
res = Dot22.make_node(self, x, y)
......@@ -211,8 +286,13 @@ def local_inplace_gpuagemm(node):
if node.op == gpugemm_no_inplace:
return [gpugemm_inplace(*node.inputs)]
@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
if node.op == gpuger_no_inplace:
return [gpuger_inplace(*node.inputs)]
gpuablas_opt_inplace = in2out(LocalOptGroup(
local_inplace_gpuagemv, local_inplace_gpuagemm),
local_inplace_gpuagemv, local_inplace_gpuagemm, local_inplace_gpuager),
name='gpuablas_opt_inplace')
optdb.register('InplaceGpuaBlasOpt',
gpuablas_opt_inplace,
......
......@@ -17,7 +17,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
GpuAlloc,
GpuReshape,
GpuEye)
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
from theano.sandbox.gpuarray.conv import GpuConv
from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx,
......@@ -302,23 +302,23 @@ def local_gpua_careduce(node):
@register_opt()
@op_lifter([tensor.blas.Gemv])
@op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
def local_gpua_gemv(node):
return GpuGemv(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas_c.CGemv])
def local_gpua_gemv2(node):
return GpuGemv(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas.Gemm])
def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
def local_gpua_ger(node):
return GpuGer(destructive=node.op.destructive)
@register_opt()
@op_lifter([tensor.blas.Dot22])
def local_gpua_dot22(node):
......
from unittest import TestCase
from nose.plugins.skip import SkipTest
import theano
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22
from theano import tensor
from theano.tests import unittest_tools
from theano.tensor.blas import (gemv_inplace, gemm_inplace, ger_destructive,
_dot22)
from theano.tensor.tests.test_blas import TestGer, BaseGemv
from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand
from theano.sandbox.gpuarray import gpuarray_shared_constructor
from theano.sandbox.gpuarray.tests.test_basic_ops import (makeTester, rand,
mode_with_gpu)
from theano.sandbox.gpuarray.blas import (gpugemv_inplace,
gpugemm_inplace, gpu_dot22)
from theano.sandbox.gpuarray.blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemm_no_inplace,
gpuger_inplace, gpuger_no_inplace,
GpuGer, gpu_dot22)
GpuGemvTester = makeTester('GpuGemvTester',
......@@ -21,6 +30,21 @@ GpuGemvTester = makeTester('GpuGemvTester',
)
)
class TestGpuSgemv(TestCase, BaseGemv, unittest_tools.TestOptimizationMixin):
mode = mode_with_gpu
dtype = 'float32'
gemv = gpugemv_no_inplace
gemv_inplace = gpugemv_inplace
@staticmethod
def shared(val):
try:
return gpuarray_shared_constructor(val)
except TypeError:
return theano.shared(val)
GpuGemmTester = makeTester('GpuGemmTester',
op=gemm_inplace, gpu_op=gpugemm_inplace,
cases=dict(
......@@ -37,9 +61,40 @@ GpuGemmTester = makeTester('GpuGemmTester',
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
# test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
)
class TestGpuSger(TestGer):
def setUp(self):
self.mode = mode_with_gpu
dtype = self.dtype = 'float32' # optimization isn't dtype-dependent
self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))
self.a = tensor.tensor(dtype=dtype, broadcastable=())
self.x = tensor.tensor(dtype=dtype, broadcastable=(False,))
self.y = tensor.tensor(dtype=dtype, broadcastable=(False,))
self.ger_destructive = gpuger_inplace
# data on the gpu make the op always inplace
self.ger = gpuger_inplace
self.gemm = gpugemm_inplace
def test_f32_0_0(self):
raise SkipTest('0-sized objects not supported')
def test_f32_1_0(self):
raise SkipTest('0-sized objects not supported')
def test_f32_0_1(self):
raise SkipTest('0-sized objects not supported')
class TestGpuSgerNoTransfer(TestGpuSger):
shared = staticmethod(gpuarray_shared_constructor)
class TestGpuGer_OpContract(TestCase, unittest_tools.T_OpContractMixin):
def setUp(self):
self.ops = [gpuger_no_inplace, gpuger_inplace]
def clone(self, op):
return GpuGer(destructive=op.destructive)
GpuDot22Tester = makeTester(
'GpuGemmTester',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论