提交 16c6f70b authored 作者: Reyhane Askari's avatar Reyhane Askari

GpuJoin perform and C function work inplace

上级 46eaf9df
......@@ -1255,16 +1255,14 @@ class GpuJoin(HideC, Join):
# start from index 1.
self.view_map = {0: [1 + view]}
# def __str__(self):
# if self.view == -1:
# return "Join"
# else:
# return super(Join, self).__str__()
# def __setstate__(self, d):
# self.__dict__.update(d)
# if not hasattr(self, "view"):
# self.view = -1
def __str__(self):
if self.view == -1:
return self.__class__.__name__
else:
return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors)
......@@ -1296,59 +1294,85 @@ class GpuJoin(HideC, Join):
if (view != -1) and numpy.all(
[tensor.shape[axis] == 0 for tensor in
tensors[0:view] + tensors[view + 1:]]):
import ipdb; ipdb.set_trace()
out[0] = tensors[view]
else:
out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype(
node.outputs[0].dtype)
def c_code_cache_version(self):
return
return (2,)
return (3,)
def c_support_code_(self):
def c_support_code(self):
return """
#if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong
#endif
"""
#if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong
#endif
"""
def c_code_(self, node, name, inputs, out_, sub):
def c_headers(self):
return ['<numpy_compat.h>']
def c_code(self, node, name, inputs, out_, sub):
axis, tensors = inputs[0], inputs[1:]
copy_to_list = []
restype = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
for i, inp in enumerate(inputs[1:]):
view = self.view
non_empty_tensor = tensors[view]
for i, inp in enumerate(tensors):
copy_to_list.append("als[%s] = &%s->ga;" % (i, inp))
return """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) *
n = len(tensors)
fail = sub['fail']
out = out_[0]
copy_inputs_to_list = '\n'.join(copy_to_list)
restype = restype
ctx = sub['params']
code = """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) *
%(n)s);
if (als == NULL) {
PyErr_NoMemory();
%(fail)s
}
%(copy_inputs_to_list)s
Py_XDECREF(%(out)s);
{
int axis = PyInt_AsLong((PyObject *)%(axis)s);
if (axis < 0) {
if (axis == -1 && PyErr_Occurred()) {
%(fail)s
}
axis += als[0]->nd;
if (axis < 0) {
PyErr_SetString(PyExc_IndexError, "invalid axis");
%(fail)s
}
}
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType,
%(ctx)s);
}
PyMem_Free(als);
if (%(out)s == NULL)
%(fail)s
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0],
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list),
restype=restype, ctx=sub['params'])
if (als == NULL) {
PyErr_NoMemory();
%(fail)s
}
%(copy_inputs_to_list)s
Py_XDECREF(%(out)s);
{
int axis = PyInt_AsLong((PyObject *)%(axis)s);
if (axis < 0) {
if (axis == -1 && PyErr_Occurred()) {
%(fail)s
}
axis += als[0]->nd;
if (axis < 0) {
PyErr_SetString(PyExc_IndexError, "invalid axis");
%(fail)s
}
}
int tensors_lens_sum = 0""" % locals()
for inp in tensors:
code += """ + PyGpuArray_DIM(%(inp)s, axis)""" % locals()
code += """;\n
tensors_lens_sum -= PyGpuArray_DIM(%(non_empty_tensor)s, axis);
if(%(view)s != -1 && tensors_lens_sum == 0){
Py_XDECREF(%(out)s);
Py_INCREF(%(non_empty_tensor)s);
%(out)s = %(non_empty_tensor)s;
}
else{
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType,
%(ctx)s);
}
PyMem_Free(als);
}
if (%(out)s == NULL)
%(fail)s
""" % locals()
return code
gpu_join = GpuJoin()
......
......@@ -453,3 +453,24 @@ def test_hostfromgpu_shape_i():
assert isinstance(topo[1].op, theano.compile.Shape_i)
assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
assert tuple(f(cv)) == (5, 4)
def test_Gpujoin_inplace():
"""Test Gpujoin to work inplace.
This function tests the case when several elements are passed to the
Gpujoin function but all except one of them are empty. In this case
Gpujoin should work inplace and the output should be the view of the
non-empty element.
"""
s = T.lscalar()
data = numpy.array([3, 4, 5], dtype=theano.config.floatX)
x = theano.shared(data, borrow=True)
z = T.zeros((s,))
join = GpuJoin(view=0)
c = join(0, x, z)
f = theano.function([s], theano.Out(c, borrow=True))
assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert numpy.allclose(f(0), [3, 4, 5])
......@@ -3897,9 +3897,12 @@ class Join(Op):
def __str__(self):
if self.view == -1:
return "Join"
return self.__class__.__name__
else:
return super(Join, self).__str__()
return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
def __setstate__(self, d):
self.__dict__.update(d)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论