提交 a0921c99 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuElemwise op (python-only for now).

上级 43cc967e
import numpy
from theano import Op, Apply, scalar
from pygpu.tools import ScalarArg, ArrayArg
from pygpu.elemwise import ElemwiseKernel
from basic_ops import as_gpuarray_variable
from type import GpuArrayType
from theano.gof.utils import MethodNotDefined
def _is_scalar(v):
False
def make_argument(v, name):
if _is_scalar(v):
return ScalarArg(numpy.dtype(v.type.dtype), name)
else:
return ArrayArg(numpy.dtype(v.type.dtype), name)
def ensure_out(o, ref):
if o is None:
return ref._empty_like_me()
else:
return o
class GpuElemwise(Op):
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
def __init__(self, scalar_op):
self.scalar_op = scalar_op
self.destroy_map = {}
def __getstate__(self):
d = copy.copy(self.__dict__)
d.pop('__epydoc_asRoutine', None)
d.pop('_hashval')
return d
def __setstate__(self, d):
self.__dict__.update(d)
self._rehash()
def __eq__(self, other):
return (type(self) == type(other) and
self.scalar_op == other.scalar_op)
def __hash__(self):
return hash(type(self)) ^ hash(self.scalar_op)
def __str__(self):
return "GpuElemwise{%s}(gpuarray)" % (self.scalar_op,)
def make_node(self, *inputs):
_inputs = [as_gpuarray_variable(i) for i in inputs]
if self.nin > 0 and len(_inputs) != self.nin:
raise TypeError("Wrong argument count", (self.nin, len(_inputs)))
for i in _inputs[1:]:
if i.type.ndim != inputs[0].type.ndim:
raise TypeError('mismatched rank amongst inputs')
broadcastable = []
for d in xrange(_inputs[0].type.ndim):
bcast_d = True
for i in _inputs:
if not i.type.broadcastable[d]:
bcast_d = False
break
broadcastable.append(bcast_d)
assert len(broadcastable) == _inputs[0].type.ndim
assert self.nout > 0
inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(inputs)]
scal_ins = [scalar.Scalar(i.dtype) for i in inputs]
res = Apply(self, _inputs,
[GpuArrayType(o.dtype, broadcastable)()
for o in self.scalar_op.output_types(scal_ins)])
outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(res.outputs)]
scal_out = [scalar.Scalar(o.dtype) for o in res.outputs]
fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
[o() for o in scal_out])
kcode = self.scalar_op.c_code(fake_node, 'kcode',
[i.expr() for i in inps],
[o.expr() for o in outs],
sub=dict(fail='return;'))
res.tag.kcode = kcode
support_code = ""
try:
support_code += self.scalar_op.c_support_code_apply(fake_node, 'kcode')
except MethodNotDefined:
pass
try:
support_code += self.scalar_op.c_support_code()
except MethodNotDefined:
pass
k = ElemwiseKernel(None, inps+outs, kcode, preamble=support_code)
res.tag.kernel = k
return res
def perform(self, node, inps, out):
k = node.tag.kernel
outs = [ensure_out(o[0], inps[0]) for o in out]
k.call_dimspec(*(inps+outs), broadcast=True)
for o, og in zip(out, outs):
o[0] = og
import theano
import theano, numpy
from theano import tensor
from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
......@@ -9,6 +9,7 @@ from theano.gof.python25 import all, any
from theano.sandbox.gpuarray.type import GpuArrayType
from basic_ops import host_from_gpu, gpu_from_host, gpu_alloc
from elemwise import GpuElemwise, _is_scalar
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
......@@ -111,3 +112,37 @@ def local_gpualloc(node):
new_out = tensor.patternbroadcast(new_out. old_out.broadcastable)
return [new_out]
@register_opt()
@local_optimizer([])
def local_gpu_elemwise(node):
do_replace = False
gpu_out = False
# check for gpu_from_host(Elemwise)) and extract the Elemwise node
if node.op == gpu_from_host:
host_i, = node.inputs
if (host_i.owner and
isinstance(host_i.owner.op, tensor.Elemwise) and
len(host_i.clients) == 1):
node = host_i.owner
do_replace = True
gpu_out = True
# check for elemwise(..., host_from_gpu, ...)
if isinstance(node.op, tensor.Elemwise):
if numpy.any([i.owner and
i.owner.op == host_from_gpu
for i in node.inputs]):
do_replace = True
if numpy.all([_is_scalar(i)
for i in node.inputs]):
do_replace = False
if do_replace:
new_op = GpuElemwise(node.op.scalar_op)
gpu_elemwise = new_op(*(gpu_from_host(i) for i in node.inputs))
if gpu_out:
return [gpu_elemwise]
else:
return [host_from_gpu(gpu_elemwise)]
else:
return False
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论