提交 26941db0 authored 作者: Frederic's avatar Frederic

Add GpuDot22

上级 b7b88b1c
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.tensor.blas import Gemv, Gemm from theano.tensor.blas import Dot22, Gemv, Gemm
from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable) from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable)
try: try:
...@@ -128,12 +128,73 @@ class GpuGemm(BlasOp, Gemm): ...@@ -128,12 +128,73 @@ class GpuGemm(BlasOp, Gemm):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return
return (0,) return (0,)
gpugemm_no_inplace = GpuGemm(inplace=False) gpugemm_no_inplace = GpuGemm(inplace=False)
gpugemm_inplace = GpuGemm(inplace=True) gpugemm_inplace = GpuGemm(inplace=True)
class GpuDot22(BlasOp, Dot22):
def make_node(self, x, y):
res = Dot22.make_node(self, x, y)
x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y)
assert x.dtype == y.dtype
return Apply(self, [x, y], [x.type()])
def perform(self, node, inputs, outputs):
x, y = inputs
out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype)
outputs[0][0] = blas.gemm(1., x, y, 0., out,
overwrite_c=True)
def c_code(self, node, name, inputs, outputs, sub):
dtype = node.inputs[0].dtype
typecode = pygpu.gpuarray.dtype_to_typecode(dtype)
vars = dict(A=inputs[0], B=inputs[1], dtype=dtype, out=outputs[0],
typecode=typecode,
fail=sub['fail'], name=name)
code = """
double one = 1.;
double zero = 0.;
size_t dims[] = {PyGpuArray_DIMS(%(A)s)[0], PyGpuArray_DIMS(%(B)s)[1]};
%(out)s = pygpu_empty(2, dims,
%(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!%(out)s) {
%(fail)s
}
if (pygpu_blas_rgemm(cb_no_trans, cb_no_trans,
one,
%(A)s, %(B)s,
zero,
%(out)s) == NULL) {
%(fail)s
}
""" % vars
if config.gpuarray.sync:
code += """
GpuArray_sync(&%(out)s->ga);
""" % vars
return code
def c_code_cache_version(self):
return
return (0,)
def c_headers(self):
ret = super(GpuDot22, self).c_headers()
return ret + ['<compyte/numpy_compat.h>']
gpu_dot22 = GpuDot22()
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out from theano.tensor.opt import in2out
......
...@@ -18,7 +18,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, ...@@ -18,7 +18,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar, from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduce) GpuDimShuffle, GpuCAReduce)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.sandbox.gpuarray.blas import GpuGemv, GpuGemm from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm
gpu_optimizer = EquilibriumDB() gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB() gpu_cut_copies = EquilibriumDB()
...@@ -238,6 +238,12 @@ def local_gpua_gemm(node): ...@@ -238,6 +238,12 @@ def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace) return GpuGemm(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas.Dot22])
def local_gpua_dot22(node):
return gpu_dot22
@register_opt() @register_opt()
@op_lifter([tensor.basic.Eye]) @op_lifter([tensor.basic.Eye])
def local_gpua_eye(node): def local_gpua_eye(node):
......
from unittest import TestCase from unittest import TestCase
from theano.tensor.blas import gemv_inplace, gemm_inplace import theano
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22
from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand
from theano.sandbox.gpuarray.blas import (gpugemv_inplace, from theano.sandbox.gpuarray.blas import (gpugemv_inplace,
gpugemm_inplace) gpugemm_inplace, gpu_dot22)
GpuGemvTester = makeTester('GpuGemvTester', GpuGemvTester = makeTester('GpuGemvTester',
op=gemv_inplace, gpu_op=gpugemv_inplace, op=gemv_inplace, gpu_op=gpugemv_inplace,
...@@ -29,7 +31,28 @@ GpuGemmTester = makeTester('GpuGemmTester', ...@@ -29,7 +31,28 @@ GpuGemmTester = makeTester('GpuGemmTester',
test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6], test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6],
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0], test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0], test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.0], test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.0], test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1],
) # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
# test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
GpuDot22Tester = makeTester(
'GpuGemmTester',
op=_dot22, gpu_op=gpu_dot22,
cases=dict(
test1=[rand(3, 4), rand(4, 5)],
test2=[rand(1, 4), rand(4, 5)],
test3=[rand(3, 1), rand(1, 5)],
test4=[rand(3, 4), rand(4, 1)],
# test5=[rand(0, 4), rand(4, 5)],
# test6=[rand(3, 0), rand(0, 5)],
# test7=[rand(3, 4), rand(4, 0)],
# test8=[rand(0, 4), rand(4, 0)],
# test9=[rand(0, 0), rand(0, 0)],
)
) )
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论