提交 16c6f70b authored 作者: Reyhane Askari's avatar Reyhane Askari

GpuJoin perform and C function work inplace

上级 46eaf9df
...@@ -1255,16 +1255,14 @@ class GpuJoin(HideC, Join): ...@@ -1255,16 +1255,14 @@ class GpuJoin(HideC, Join):
# start from index 1. # start from index 1.
self.view_map = {0: [1 + view]} self.view_map = {0: [1 + view]}
# def __str__(self): def __str__(self):
# if self.view == -1: if self.view == -1:
# return "Join" return self.__class__.__name__
# else: else:
# return super(Join, self).__str__() return "%s{%s}" % (
self.__class__.__name__,
# def __setstate__(self, d): ", ".join("%s=%r" % (p, getattr(self, p))
# self.__dict__.update(d) for p in self.__props__))
# if not hasattr(self, "view"):
# self.view = -1
def make_node(self, axis, *tensors): def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors) node = Join.make_node(self, axis, *tensors)
...@@ -1296,59 +1294,85 @@ class GpuJoin(HideC, Join): ...@@ -1296,59 +1294,85 @@ class GpuJoin(HideC, Join):
if (view != -1) and numpy.all( if (view != -1) and numpy.all(
[tensor.shape[axis] == 0 for tensor in [tensor.shape[axis] == 0 for tensor in
tensors[0:view] + tensors[view + 1:]]): tensors[0:view] + tensors[view + 1:]]):
import ipdb; ipdb.set_trace()
out[0] = tensors[view] out[0] = tensors[view]
else: else:
out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype( out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype(
node.outputs[0].dtype) node.outputs[0].dtype)
def c_code_cache_version(self): def c_code_cache_version(self):
return return (3,)
return (2,)
def c_support_code_(self): def c_support_code(self):
return """ return """
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong #define PyInt_AsLong PyLong_AsLong
#endif #endif
""" """
def c_code_(self, node, name, inputs, out_, sub): def c_headers(self):
return ['<numpy_compat.h>']
def c_code(self, node, name, inputs, out_, sub):
axis, tensors = inputs[0], inputs[1:]
copy_to_list = [] copy_to_list = []
restype = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype) restype = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
for i, inp in enumerate(inputs[1:]): view = self.view
non_empty_tensor = tensors[view]
for i, inp in enumerate(tensors):
copy_to_list.append("als[%s] = &%s->ga;" % (i, inp)) copy_to_list.append("als[%s] = &%s->ga;" % (i, inp))
return """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) * n = len(tensors)
fail = sub['fail']
out = out_[0]
copy_inputs_to_list = '\n'.join(copy_to_list)
restype = restype
ctx = sub['params']
code = """
const GpuArray **als = (const GpuArray **)PyMem_Malloc(sizeof(GpuArray *) *
%(n)s); %(n)s);
if (als == NULL) { if (als == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
%(fail)s %(fail)s
} }
%(copy_inputs_to_list)s %(copy_inputs_to_list)s
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
{ {
int axis = PyInt_AsLong((PyObject *)%(axis)s); int axis = PyInt_AsLong((PyObject *)%(axis)s);
if (axis < 0) { if (axis < 0) {
if (axis == -1 && PyErr_Occurred()) { if (axis == -1 && PyErr_Occurred()) {
%(fail)s %(fail)s
} }
axis += als[0]->nd; axis += als[0]->nd;
if (axis < 0) { if (axis < 0) {
PyErr_SetString(PyExc_IndexError, "invalid axis"); PyErr_SetString(PyExc_IndexError, "invalid axis");
%(fail)s %(fail)s
} }
} }
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType, int tensors_lens_sum = 0""" % locals()
%(ctx)s);
} for inp in tensors:
PyMem_Free(als); code += """ + PyGpuArray_DIM(%(inp)s, axis)""" % locals()
if (%(out)s == NULL) code += """;\n
%(fail)s tensors_lens_sum -= PyGpuArray_DIM(%(non_empty_tensor)s, axis);
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0], if(%(view)s != -1 && tensors_lens_sum == 0){
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list), Py_XDECREF(%(out)s);
restype=restype, ctx=sub['params']) Py_INCREF(%(non_empty_tensor)s);
%(out)s = %(non_empty_tensor)s;
}
else{
%(out)s = pygpu_concatenate(als, %(n)s, axis,
%(restype)s, (PyObject *)&PyGpuArrayType,
%(ctx)s);
}
PyMem_Free(als);
}
if (%(out)s == NULL)
%(fail)s
""" % locals()
return code
gpu_join = GpuJoin() gpu_join = GpuJoin()
......
...@@ -453,3 +453,24 @@ def test_hostfromgpu_shape_i(): ...@@ -453,3 +453,24 @@ def test_hostfromgpu_shape_i():
assert isinstance(topo[1].op, theano.compile.Shape_i) assert isinstance(topo[1].op, theano.compile.Shape_i)
assert isinstance(topo[2].op, theano.tensor.opt.MakeVector) assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
assert tuple(f(cv)) == (5, 4) assert tuple(f(cv)) == (5, 4)
def test_Gpujoin_inplace():
"""Test Gpujoin to work inplace.
This function tests the case when several elements are passed to the
Gpujoin function but all except one of them are empty. In this case
Gpujoin should work inplace and the output should be the view of the
non-empty element.
"""
s = T.lscalar()
data = numpy.array([3, 4, 5], dtype=theano.config.floatX)
x = theano.shared(data, borrow=True)
z = T.zeros((s,))
join = GpuJoin(view=0)
c = join(0, x, z)
f = theano.function([s], theano.Out(c, borrow=True))
assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert numpy.allclose(f(0), [3, 4, 5])
...@@ -3897,9 +3897,12 @@ class Join(Op): ...@@ -3897,9 +3897,12 @@ class Join(Op):
def __str__(self): def __str__(self):
if self.view == -1: if self.view == -1:
return "Join" return self.__class__.__name__
else: else:
return super(Join, self).__str__() return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论