提交 c0342e58 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuAdvancedSubtensor1, based on libgpuarray _take1().

上级 5ecbbde2
...@@ -35,6 +35,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, ...@@ -35,6 +35,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY) GpuCAReduceCPY)
from .subtensor import (GpuIncSubtensor, GpuSubtensor, from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
...@@ -488,6 +489,12 @@ def local_gpua_incsubtensor(node): ...@@ -488,6 +489,12 @@ def local_gpua_incsubtensor(node):
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedSubtensor1])
def local_gpua_advanced_subtensor(node):
return GpuAdvancedSubtensor1()
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1]) @op_lifter([tensor.AdvancedIncSubtensor1])
def local_gpua_advanced_incsubtensor(node): def local_gpua_advanced_incsubtensor(node):
......
...@@ -405,6 +405,77 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -405,6 +405,77 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
return parent_version + elemwise_version + (2,) return parent_version + elemwise_version + (2,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
def make_node(self, x, ilist):
x_ = as_gpuarray_variable(x)
ilist__ = tensor.as_tensor_variable(ilist)
if ilist__.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist__.type.dtype != 'int64':
ilist__ = tensor.cast(ilist__, 'int64')
ilist_ = as_gpuarray_variable(ilist__)
if ilist_.type.dtype != 'int64':
raise TypeError('index must be int64')
if ilist_.type.ndim != 1:
raise TypeError('index must be a vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
bcast = ilist_.broadcastable + x_.broadcastable[1:]
return gof.Apply(self, [x_, ilist_],
[GpuArrayType(dtype=x.dtype,
broadcastable=bcast)()])
def perform(self, node, inp, out_):
raise NotImplementedError()
def c_support_code(self):
return """
int take1_match_dims(GpuArray *a, GpuArray *v) {
if (a->nd != v->nd) return 0;
for (unsigned int i = 1; i < v->nd; i++) {
if (a->dimensions[i] != v->dimensions[i]) return 0;
}
return 1;
}
"""
def c_code(self, node, name, inputs, outputs, sub):
return """
int err;
if (%(out)s == NULL || !GpuArray_IS_C_CONTIGUOUS(&%(out)s->ga) ||
%(out)s->ga.dimensions[0] != %(idx)s->ga.dimensions[0] ||
!take1_match_dims(&%(out)s->ga, &%(v)s->ga)) {
size_t tmp;
Py_XDECREF(%(out)s);
/* This is a dirty hack to avoid an extra alloc */
tmp = %(v)s->ga.dimensions[0];
%(v)s->ga.dimensions[0] = %(idx)s->ga.dimensions[0];
%(out)s = pygpu_empty(%(v)s->ga.nd, %(v)s->ga.dimensions, %(v)s->ga.typecode,
GA_C_ORDER, %(v)s->context, Py_None);
%(v)s->ga.dimensions[0] = tmp; // Don't remove this line
}
err = GpuArray_take1(&%(out)s->ga, &%(v)s->ga, &%(idx)s->ga, 1);
if (err != GA_NO_ERROR) {
if (err == GA_VALUE_ERROR) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds.");
} else {
PyErr_SetString(PyExc_RuntimeError, Gpu_error(%(v)s->context->ops,
%(v)s->context->ctx, err));
}
%(fail)s
}
""" % dict(out=outputs[0], v=inputs[0], idx=inputs[1], fail=sub['fail'])
def c_code_cache_version(self):
return (0,)
class GpuAdvancedIncSubtensor1(HideC, tensor.AdvancedIncSubtensor1): class GpuAdvancedIncSubtensor1(HideC, tensor.AdvancedIncSubtensor1):
""" """
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
......
...@@ -7,6 +7,7 @@ from theano.tensor.tests import test_subtensor ...@@ -7,6 +7,7 @@ from theano.tensor.tests import test_subtensor
from ..basic_ops import HostFromGpu, GpuFromHost from ..basic_ops import HostFromGpu, GpuFromHost
from ..subtensor import (GpuIncSubtensor, GpuSubtensor, from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1) GpuAdvancedIncSubtensor1)
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
...@@ -24,6 +25,7 @@ class G_subtensor(test_subtensor.T_subtensor): ...@@ -24,6 +25,7 @@ class G_subtensor(test_subtensor.T_subtensor):
shared=gpuarray_shared_constructor, shared=gpuarray_shared_constructor,
sub=GpuSubtensor, sub=GpuSubtensor,
inc_sub=GpuIncSubtensor, inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1, adv_incsub1=GpuAdvancedIncSubtensor1,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论