提交 5d7a1999 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuGemm op.

上级 5649f8b6
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.tensor.blas import Gemv from theano.tensor.blas import Gemv, Gemm
from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable) from theano.sandbox.gpuarray.basic_ops import (HideC, as_gpuarray_variable)
try: try:
...@@ -10,7 +10,7 @@ except ImportError, e: ...@@ -10,7 +10,7 @@ except ImportError, e:
# To make sure theano is importable # To make sure theano is importable
pass pass
class BlasOp(HideC, Op): class BlasOp(HideC):
def c_headers(self): def c_headers(self):
return ['<blas_api.h>'] return ['<blas_api.h>']
...@@ -71,6 +71,58 @@ class GpuGemv(BlasOp, Gemv): ...@@ -71,6 +71,58 @@ class GpuGemv(BlasOp, Gemv):
gpugemv_no_inplace = GpuGemv(inplace=False) gpugemv_no_inplace = GpuGemv(inplace=False)
gpugemv_inplace = GpuGemv(inplace=True) gpugemv_inplace = GpuGemv(inplace=True)
class GpuGemm(BlasOp, Gemm):
def make_node(self, C, alpha, A, B, beta):
res = Gemm.make_node(self, C, alpha, A, B, beta)
A = as_gpuarray_variable(A)
B = as_gpuarray_variable(B)
C = as_gpuarray_variable(C)
return Apply(self, [C, alpha, A, B, beta], [C.type()])
def perform(self, node, inputs, outputs):
C, alpha, A, B, beta = inputs
outputs[0][0] = blas.gemm(alpha, A, B, beta, C,
overwrite_c=self.inplace)
def c_code(self, node, name, inp, out, sub):
vars = dict(out=out[0], C=inp[0], alpha=inp[1], A=inp[2], B=inp[3],
beta=inp[4], fail=sub['fail'], name=name)
if self.inplace:
code = """
Py_XDECREF(%(out)s);
%(out)s = %(C)s;
Py_INCREF(%(out)s);
""" % vars
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER);
if (%(out)s == NULL) {
%(fail)s
}
""" % vars
code += """
if (pygpu_blas_rgemm(cb_no_trans, cb_no_trans,
((dtype_%(alpha)s *)PyArray_DATA(%(alpha)s))[0],
%(A)s, %(B)s,
((dtype_%(beta)s *)PyArray_DATA(%(beta)s))[0],
%(out)s) == NULL) {
%(fail)s
}
""" % vars
if config.gpuarray.sync:
code += """
GpuArray_sync(&%(out)s->ga);
"""
return code
def c_code_cache_version(self):
return (0,)
gpugemm_no_inplace = GpuGemm(inplace=False)
gpugemm_inplace = GpuGemm(inplace=True)
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer, EquilibriumOptimizer from theano.gof import local_optimizer, EquilibriumOptimizer
...@@ -79,10 +131,15 @@ def local_inplace_gpuagemv(node): ...@@ -79,10 +131,15 @@ def local_inplace_gpuagemv(node):
if node.op == gpugemv_no_inplace: if node.op == gpugemv_no_inplace:
return [gpugemv_inplace(*node.inputs)] return [gpugemv_inplace(*node.inputs)]
#gpuablas_opt_inplace = EquilibriumOptimzer( @local_optimizer([gpugemm_no_inplace])
# [local_inplace_gpuagemv], def local_inplace_gpuagemm(node):
# failure_callback=EquilibriumOptimizer.warn_inplace, if node.op == gpugemm_no_inplace:
# max_use_ratio=5) return [gpugemm_inplace(*node.inputs)]
#optdb.register('InplaceGpuaBlasOpt',
# gpuablas_opt_inplace, gpuablas_opt_inplace = EquilibriumOptimzer(
# 70.0, 'fast_run', 'inplace', 'gpuarray') [local_inplace_gpuagemv, local_inplace_gpuagemm],
failure_callback=EquilibriumOptimizer.warn_inplace,
max_use_ratio=5)
optdb.register('InplaceGpuaBlasOpt',
gpuablas_opt_inplace,
70.0, 'fast_run', 'inplace', 'gpuarray')
...@@ -186,3 +186,8 @@ def local_gpua_careduce(node): ...@@ -186,3 +186,8 @@ def local_gpua_careduce(node):
@op_lifter(tensor.blas.Gemv) @op_lifter(tensor.blas.Gemv)
def local_gpua_gemv(node): def local_gpua_gemv(node):
return GpuGemv(inplace=node.op.inplace) return GpuGemv(inplace=node.op.inplace)
@register_opt()
@op_listfer(tensor.blas.Gemm)
def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论