提交 2fa3cecb authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5321 from ReyhaneAskari/fix_5253

unstable commit-fixed perform on gpujoin
...@@ -1245,8 +1245,19 @@ class GpuJoin(HideC, Join): ...@@ -1245,8 +1245,19 @@ class GpuJoin(HideC, Join):
""" """
_f16_ok = True _f16_ok = True
__props__ = ("view",)
params_type = gpu_context_type params_type = gpu_context_type
def __init__(self, view=-1):
self.view = view
if view != -1:
# since the first input is always the axis, the tensors
# start from index 1.
self.view_map = {0: [1 + view]}
def __str__(self):
return Join.__str__(self)
def make_node(self, axis, *tensors): def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors) node = Join.make_node(self, axis, *tensors)
...@@ -1265,61 +1276,98 @@ class GpuJoin(HideC, Join): ...@@ -1265,61 +1276,98 @@ class GpuJoin(HideC, Join):
def perform(self, node, axis_and_tensors, out_, ctx): def perform(self, node, axis_and_tensors, out_, ctx):
out, = out_ out, = out_
view = self.view
axis = int(axis_and_tensors[0]) axis = int(axis_and_tensors[0])
tensors = axis_and_tensors[1:]
if axis < -axis_and_tensors[1].ndim: if axis < -axis_and_tensors[1].ndim:
raise IndexError raise IndexError
if axis < 0: if axis < 0:
axis += axis_and_tensors[1].ndim axis += axis_and_tensors[1].ndim
tensors = axis_and_tensors[1:] # we check these tensors for being empty.
out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype( if (view != -1) and numpy.all(
node.outputs[0].dtype) [tensor.shape[axis] == 0 for tensor in
tensors[0:view] + tensors[view + 1:]]):
out[0] = tensors[view]
else:
out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype(
node.outputs[0].dtype)
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
def c_support_code(self): def c_support_code(self):
return """ return """
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong #define PyInt_AsLong PyLong_AsLong
#endif #endif
""" """
def c_headers(self):
return ['<numpy_compat.h>']
def c_code(self, node, name, inputs, out_, sub): def c_code(self, node, name, inputs, out_, sub):
axis, tensors = inputs[0], inputs[1:]
copy_to_list = [] copy_to_list = []
restype = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype) restype = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
for i, inp in enumerate(inputs[1:]): view = self.view
non_empty_tensor = tensors[view]
for i, inp in enumerate(tensors):
copy_to_list.append("als[%s] = &%s->ga;" % (i, inp)) copy_to_list.append("als[%s] = &%s->ga;" % (i, inp))
return """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) * n = len(tensors)
fail = sub['fail']
out = out_[0]
copy_inputs_to_list = '\n'.join(copy_to_list)
restype = restype
ctx = sub['params']
code = """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) *
%(n)s); %(n)s);
if (als == NULL) { if (als == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
%(fail)s %(fail)s
} }
%(copy_inputs_to_list)s %(copy_inputs_to_list)s
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
{ {
int axis = PyInt_AsLong((PyObject *)%(axis)s); int axis = PyInt_AsLong((PyObject *)%(axis)s);
if (axis < 0) { if (axis < 0) {
if (axis == -1 && PyErr_Occurred()) { if (axis == -1 && PyErr_Occurred()) {
%(fail)s %(fail)s
} }
axis += als[0]->nd; axis += als[0]->nd;
if (axis < 0) { if (axis < 0) {
PyErr_SetString(PyExc_IndexError, "invalid axis"); PyErr_SetString(PyExc_IndexError, "invalid axis");
%(fail)s %(fail)s
} }
} }
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType, int tensors_lens_sum;
%(ctx)s); if(%(view)s != -1) {
} tensors_lens_sum = 0;
PyMem_Free(als); for(int i=0; i < %(n)s; i++){
if (%(out)s == NULL) tensors_lens_sum += als[i]->dimensions[axis];
%(fail)s }
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0], tensors_lens_sum -= PyGpuArray_DIM(%(non_empty_tensor)s, axis);
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list), }
restype=restype, ctx=sub['params'])
if(%(view)s != -1 && tensors_lens_sum == 0) {
Py_INCREF(%(non_empty_tensor)s);
%(out)s = %(non_empty_tensor)s;
}else{
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType,
%(ctx)s);
}
}
PyMem_Free(als);
if (%(out)s == NULL)
%(fail)s
""" % locals()
return code
gpu_join = GpuJoin() gpu_join = GpuJoin()
......
...@@ -453,3 +453,24 @@ def test_hostfromgpu_shape_i(): ...@@ -453,3 +453,24 @@ def test_hostfromgpu_shape_i():
assert isinstance(topo[1].op, theano.compile.Shape_i) assert isinstance(topo[1].op, theano.compile.Shape_i)
assert isinstance(topo[2].op, theano.tensor.opt.MakeVector) assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
assert tuple(f(cv)) == (5, 4) assert tuple(f(cv)) == (5, 4)
def test_Gpujoin_inplace():
"""Test Gpujoin to work inplace.
This function tests the case when several elements are passed to the
Gpujoin function but all except one of them are empty. In this case
Gpujoin should work inplace and the output should be the view of the
non-empty element.
"""
s = T.lscalar()
data = numpy.array([3, 4, 5], dtype=theano.config.floatX)
x = gpuarray_shared_constructor(data, borrow=True)
z = T.zeros((s,))
join = GpuJoin(view=0)
c = join(0, x, z)
f = theano.function([s], theano.Out(c, borrow=True))
assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert numpy.allclose(f(0), [3, 4, 5])
...@@ -3896,9 +3896,12 @@ class Join(Op): ...@@ -3896,9 +3896,12 @@ class Join(Op):
def __str__(self): def __str__(self):
if self.view == -1: if self.view == -1:
return "Join" return self.__class__.__name__
else: else:
return super(Join, self).__str__() return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -4044,28 +4047,37 @@ class Join(Op): ...@@ -4044,28 +4047,37 @@ class Join(Op):
out, = outputs out, = outputs
fail = sub['fail'] fail = sub['fail']
adtype = node.inputs[0].type.dtype_specs()[1] adtype = node.inputs[0].type.dtype_specs()[1]
code = """ copy_to_list = []
int axis = ((%(adtype)s *)PyArray_DATA(%(axis)s))[0];
int tensors_lens_sum = 0""" % locals()
for i, inp in enumerate(tensors): for i, inp in enumerate(tensors):
code += """ + PyArray_DIM(%(inp)s, axis) """ % locals() copy_to_list.append(
code += """;\n """Py_INCREF(%s);
tensors_lens_sum -= PyArray_DIM(%(non_empty_tensor)s, axis); PyList_SetItem(list, %s, (PyObject*)%s);"""
% (inp, i, inp))
copy_inputs_to_list = '\n'.join(copy_to_list)
n = len(tensors)
khar = "printf(\"tensors_lens_sum: %d\", tensors_lens_sum);"
if(%(view)s != -1 && tensors_lens_sum == 0){ code = """
int axis = ((%(adtype)s *)PyArray_DATA(%(axis)s))[0];
PyObject* list = PyList_New(%(l)s);
%(copy_inputs_to_list)s
int tensors_lens_sum;
if(%(view)s != -1) {
tensors_lens_sum = 0;
for(int i=0; i < %(n)s; i++){
tensors_lens_sum += PyArray_DIM((PyArrayObject *)(PyList_GetItem(list, i)), axis);
}
%(khar)s
tensors_lens_sum -= PyArray_DIM(%(non_empty_tensor)s, axis);
}
if(%(view)s != -1 && tensors_lens_sum == 0) {
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
Py_INCREF(%(non_empty_tensor)s); Py_INCREF(%(non_empty_tensor)s);
%(out)s = %(non_empty_tensor)s; %(out)s = %(non_empty_tensor)s;
} }else{
else{
PyObject* list = PyList_New(%(l)s);
""" % locals()
for i, inp in enumerate(tensors):
code += """
Py_INCREF(%(inp)s);
PyList_SetItem(list, %(i)s, (PyObject*)%(inp)s);
""" % locals()
code += """
//PyObject* PyArray_Concatenate(PyObject* obj, int axis) //PyObject* PyArray_Concatenate(PyObject* obj, int axis)
int ndim = PyArray_NDIM(%(input_1)s); int ndim = PyArray_NDIM(%(input_1)s);
if( axis < -ndim ){ if( axis < -ndim ){
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论