提交 26941db0 authored 作者: Frederic's avatar Frederic

Add GpuDot22

上级 b7b88b1c
from theano import Op, Apply, config
from theano.tensor.blas import Gemv, Gemm
from theano.tensor.blas import Dot22, Gemv, Gemm
from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable)
try:
......@@ -128,12 +128,73 @@ class GpuGemm(BlasOp, Gemm):
return code
def c_code_cache_version(self):
return
return (0,)
gpugemm_no_inplace = GpuGemm(inplace=False)
gpugemm_inplace = GpuGemm(inplace=True)
class GpuDot22(BlasOp, Dot22):
def make_node(self, x, y):
res = Dot22.make_node(self, x, y)
x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y)
assert x.dtype == y.dtype
return Apply(self, [x, y], [x.type()])
def perform(self, node, inputs, outputs):
x, y = inputs
out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype)
outputs[0][0] = blas.gemm(1., x, y, 0., out,
overwrite_c=True)
def c_code(self, node, name, inputs, outputs, sub):
dtype = node.inputs[0].dtype
typecode = pygpu.gpuarray.dtype_to_typecode(dtype)
vars = dict(A=inputs[0], B=inputs[1], dtype=dtype, out=outputs[0],
typecode=typecode,
fail=sub['fail'], name=name)
code = """
double one = 1.;
double zero = 0.;
size_t dims[] = {PyGpuArray_DIMS(%(A)s)[0], PyGpuArray_DIMS(%(B)s)[1]};
%(out)s = pygpu_empty(2, dims,
%(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!%(out)s) {
%(fail)s
}
if (pygpu_blas_rgemm(cb_no_trans, cb_no_trans,
one,
%(A)s, %(B)s,
zero,
%(out)s) == NULL) {
%(fail)s
}
""" % vars
if config.gpuarray.sync:
code += """
GpuArray_sync(&%(out)s->ga);
""" % vars
return code
def c_code_cache_version(self):
return
return (0,)
def c_headers(self):
ret = super(GpuDot22, self).c_headers()
return ret + ['<compyte/numpy_compat.h>']
gpu_dot22 = GpuDot22()
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out
......
......@@ -18,7 +18,7 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduce)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.sandbox.gpuarray.blas import GpuGemv, GpuGemm
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
......@@ -238,6 +238,12 @@ def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace)
@register_opt()
@op_lifter([tensor.blas.Dot22])
def local_gpua_dot22(node):
return gpu_dot22
@register_opt()
@op_lifter([tensor.basic.Eye])
def local_gpua_eye(node):
......
from unittest import TestCase
from theano.tensor.blas import gemv_inplace, gemm_inplace
import theano
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22
from theano.sandbox.gpuarray.tests.test_basic_ops import makeTester, rand
from theano.sandbox.gpuarray.blas import (gpugemv_inplace,
gpugemm_inplace)
gpugemm_inplace, gpu_dot22)
GpuGemvTester = makeTester('GpuGemvTester',
op=gemv_inplace, gpu_op=gpugemv_inplace,
......@@ -29,7 +31,28 @@ GpuGemmTester = makeTester('GpuGemmTester',
test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6],
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.0],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.0],
)
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1],
# test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
# test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
# test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
# test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
GpuDot22Tester = makeTester(
'GpuGemmTester',
op=_dot22, gpu_op=gpu_dot22,
cases=dict(
test1=[rand(3, 4), rand(4, 5)],
test2=[rand(1, 4), rand(4, 5)],
test3=[rand(3, 1), rand(1, 5)],
test4=[rand(3, 4), rand(4, 1)],
# test5=[rand(0, 4), rand(4, 5)],
# test6=[rand(3, 0), rand(0, 5)],
# test7=[rand(3, 4), rand(4, 0)],
# test8=[rand(0, 4), rand(4, 0)],
# test9=[rand(0, 0), rand(0, 0)],
)
)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论