提交 3fbcbf65 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix GpuIncSubtensor since it relied on the internals of GpuElemwise which have changed.

上级 59466ca0
...@@ -18,10 +18,9 @@ try: ...@@ -18,10 +18,9 @@ try:
except ImportError: except ImportError:
pass pass
from .type import GpuArrayType from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
infer_context_name) infer_context_name)
from .elemwise import GpuElemwise
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
...@@ -168,7 +167,7 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -168,7 +167,7 @@ class GpuSubtensor(HideC, Subtensor):
return (6,) return (6,)
class GpuIncSubtensor(GpuKernelBase, IncSubtensor): class GpuIncSubtensor(IncSubtensor):
""" """
Implement IncSubtensor on the gpu. Implement IncSubtensor on the gpu.
...@@ -181,45 +180,20 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -181,45 +180,20 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
:meth:`copy_of_x`, etc. specialize the c_code for this Op. :meth:`copy_of_x`, etc. specialize the c_code for this Op.
""" """
_f16_ok = True
@property params_type = gpu_context_type
def _f16_ok(self):
return self.iadd_node.op._f16_ok
def c_headers(self):
return self.iadd_node.op.c_headers()
def c_init_code(self):
return self.iadd_node.op.c_init_code()
def gpu_kernels(self, node, nodename):
subname = nodename + "_add_to_zview"
return self.iadd_node.op.gpu_kernels(self.iadd_node, subname)
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y) ctx_name = infer_context_name(x, y)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) y = as_gpuarray_variable(y, ctx_name)
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs) rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
op = copy.copy(self) ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()])
ret = gof.Apply(op, [x, y] + rval.inputs[2:], [x.type()])
op.create_iadd_node(ret)
return ret return ret
def get_params(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def create_iadd_node(self, node):
# We store a iadd_node in the op that contain the info needed
# for the inplace add.
cop = theano.tensor.inplace.add_inplace
gop = GpuElemwise(cop.scalar_op, copy.copy(cop.inplace_pattern),
"Gpu" + cop.name, cop.nfunc_spec)
y = node.inputs[1]
xview = y.type()
iadd_node = gop(xview, y).owner
self.iadd_node = iadd_node
def perform(self, node, inputs, out_, ctx): def perform(self, node, inputs, out_, ctx):
out, = out_ out, = out_
x, y = inputs[:2] x, y = inputs[:2]
...@@ -261,18 +235,6 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -261,18 +235,6 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
out[0] = x out[0] = x
def __setstate__(self, d):
self.__dict__.update(d)
owner = getattr(self, "owner", None)
if owner:
self.create_iadd_node(owner)
def __getstate__(self):
d = copy.copy(self.__dict__)
if "iadd_node" in d:
d.pop('iadd_node')
return d
def do_type_checking(self, node): def do_type_checking(self, node):
""" """
Should raise NotImplementedError if c_code does not support Should raise NotImplementedError if c_code does not support
...@@ -365,47 +327,54 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -365,47 +327,54 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
""" """
return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals() return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals()
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/error.h>', '<gpuarray/array.h>',
'<gpuarray/elemwise.h>']
def c_support_code_struct(self, node, nodename): def c_support_code_struct(self, node, nodename):
gop = self.iadd_node.op return "\nGpuElemwise *iadd;\n"
sub_name = nodename + "_add_to_zview"
ret = gop.c_support_code_struct(self.iadd_node, sub_name)
ret += """
PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst,
PyGpuArrayObject* src){
PyGpuArrayObject* ret = NULL;
""" % locals()
inputs = ["dst", "src"]
outputs = ["ret"]
sub = {"fail": "return NULL;", "params": "dst->context"}
ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub)
ret += """
return ret;
def c_init_code_struct(self, node, name, sub):
return """
gpuelemwise_arg args[2] = {{0}};
args[0].name = "a";
args[0].typecode = %(type1)s;
args[0].flags = GE_READ|GE_WRITE;
args[0].nd = %(nd)s;
args[1].name = "b";
args[1].typecode = %(type2)s;
args[1].flags = GE_READ;
args[1].nd = %(nd)s;
iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b",
2, args, 0);
if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
%(fail)s
} }
""" """ % dict(ctx=sub['params'], fail=sub['fail'],
return ret type1=node.inputs[0].type.typecode,
type2=node.inputs[1].type.typecode,
nd=node.inputs[1].ndim)
def add_to_zview(self, nodename, x, fail): def add_to_zview(self, nodename, x, fail):
return """ return """
PyGpuArrayObject * add_result = inc_sub_iadd_%(nodename)s(zview, %(x)s);
if (! add_result )
{ {
void *args[2];
args[0] = &zview->ga;
args[1] = &%(x)s->ga;
if (GpuElemwise_call(iadd, args, GE_BROADCAST) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add");
Py_DECREF(zview); Py_DECREF(zview);
%(fail)s; %(fail)s
} }
else
{
Py_DECREF(add_result);
} }
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version() parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
elemwise_version = self.iadd_node.c_code_cache_version() if not parent_version:
if not parent_version or not elemwise_version:
return return
return parent_version + elemwise_version + (3,) return parent_version + (4,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论