提交 7767d3d1 authored 作者: Étienne Simon's avatar Étienne Simon

Add C code for GpuReshape (fixes #2369)

上级 d9fc9d73
......@@ -2437,6 +2437,102 @@ class GpuReshape(tensor.Reshape, GpuOp):
out[0] = x.reshape(tuple(shp))
def c_code_cache_version(self):
return (1,)
def c_code(self, node, name, inputs, outputs, sub):
x, shape = inputs
output, = outputs
new_ndim = self.ndim
sdtype = node.inputs[1].type.dtype_specs()[1]
fail = sub['fail']
return """
PyObject *new_shape = PyTuple_New(%(new_ndim)s);
size_t total = 1;
int compute_axis = -1;
assert (PyArray_NDIM(%(shape)s) == 1);
if (PyArray_DIM(%(shape)s, 0) != %(new_ndim)s)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: given shape is of incorrect "
"length (%%d should be %%d).",
PyArray_DIM(%(shape)s, 0), %(new_ndim)s);
%(fail)s;
}
for (size_t i = 0; i < %(new_ndim)s; ++i)
{
long dimension = ((%(sdtype)s*)(
PyArray_BYTES(%(shape)s) +
i * PyArray_STRIDES(%(shape)s)[0]))[0];
if (dimension == -1)
{
if (compute_axis != -1)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: only one -1 is accepted "
"in the new shape, but got two at "
"indices %%d and %%zu.",
compute_axis, i);
%(fail)s;
}
compute_axis = i;
}
else
{
total *= dimension;
PyObject *py_dimension = PyInt_FromLong(dimension);
PyTuple_SetItem(new_shape, i, py_dimension);
}
}
if (compute_axis != -1)
{
long dimension = CudaNdarray_SIZE(%(x)s) / total;
total *= dimension;
PyObject *py_dimension = PyInt_FromLong(dimension);
PyTuple_SetItem(new_shape, compute_axis, py_dimension);
}
if (total != CudaNdarray_SIZE(%(x)s))
{
const int *shape_from_py = CudaNdarray_HOST_DIMS(%(x)s);
char shape_from[128];
size_t offset = 0;
for (size_t i = 0; i < %(x)s->nd; ++i)
{
int ws = snprintf(shape_from + offset, 128 - offset,
" %%d,", shape_from_py[i]);
offset += ws;
if ( ws < 0 || offset >= 128 )
break;
}
shape_from[0]='(';
if(offset < 128)
shape_from[offset>0 ? offset-1 : 1] = ')';
else
for(size_t i=124; i<127; ++i)
shape_from[i] = '.';
PyObject *shape_to_py = PyObject_Str(new_shape);
const char *shape_to = PyString_AsString(shape_to_py);
PyErr_Format(PyExc_ValueError,
"GpuReshape: cannot reshape input of shape "
"%%s to shape %%s.", shape_from, shape_to);
%(fail)s;
}
Py_XDECREF(%(output)s);
%(output)s = (CudaNdarray*) CudaNdarray_Reshape(%(x)s, new_shape);
if (%(output)s == NULL)
{
%(fail)s;
}
""" % locals()
class GpuSubtensor(GpuOp, tensor.Subtensor):
"""
......
......@@ -503,6 +503,11 @@ DllExport PyObject * CudaNdarray_Copy(const CudaNdarray * self);
*/
DllExport PyObject * CudaNdarray_ReduceSum(CudaNdarray * self, PyObject * py_reduce_mask);
/**
* Reshape self to the new shape gived by the tuple shape.
*/
DllExport PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape);
/**
* Transfer the contents of numpy array `obj` to `self`.
*
......
......@@ -881,6 +881,82 @@ class GpuReshape(HideC, tensor.Reshape):
raise ValueError("total size of new array must be unchanged")
out[0] = x.reshape(tuple(shp))
def c_code_cache_version(self):
return (1,)
def c_code(self, node, name, inputs, outputs, sub):
x, shape = inputs
output, = outputs
new_ndim = self.ndim
sdtype = node.inputs[1].type.dtype_specs()[1]
fail = sub['fail']
return """
size_t old_size = 1, new_size = 1;
size_t new_dims[%(new_ndim)s];
int compute_axis = -1;
assert (PyArray_NDIM(%(shape)s) == 1);
if (PyArray_DIM(%(shape)s, 0) != %(new_ndim)s)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: given shape is of incorrect "
"length (%%d should be %%d).",
PyArray_DIM(%(shape)s, 0), %(new_ndim)s);
%(fail)s;
}
for (size_t i = 0; i < %(x)s->ga.nd; ++i)
old_size *= %(x)s->ga.dimensions[i];
for (size_t i = 0; i < %(new_ndim)s; ++i)
{
new_dims[i] = ((%(sdtype)s*)(
PyArray_BYTES(%(shape)s) +
i * PyArray_STRIDES(%(shape)s)[0]))[0];
if (new_dims[i] == -1)
{
if (compute_axis != -1)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: only one -1 is accepted "
"in the new shape, but got two at "
"indices %%d and %%zu.",
compute_axis, i);
%(fail)s;
}
compute_axis = i;
}
else
new_size *= new_dims[i];
}
if (compute_axis == -1 && new_size != old_size)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: trying to reshape an array of "
"total size %%zu into an array of total size "
"%%zu.", old_size, new_size);
%(fail)s;
}
else if (compute_axis != -1 && old_size %% new_size != 0)
{
PyErr_Format(PyExc_ValueError,
"GpuReshape: -1 axis found at index %%d in "
"new shape but the total size of the array "
"(%%zu) is not divisible by the given shapes "
"(%%zu).", compute_axis, old_size, new_size);
%(fail)s;
}
Py_XDECREF(%(output)s);
%(output)s = pygpu_reshape(%(x)s, %(new_ndim)s, new_dims,
GA_C_ORDER, 0, compute_axis);
if (%(output)s == NULL)
{
%(fail)s;
}
""" % locals()
class GpuJoin(HideC, Join):
_f16_ok = True
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论