提交 173f72bf authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #6174 from notoraptor/op-params-dimshuffle

Wrap op params for [Gpu]DimShuffle.
#section support_code_apply
int gpu_dimshuffle(PyGpuArrayObject* input, PyGpuArrayObject** out, PARAMS_TYPE* params) {
PyGpuArrayObject *tmp = NULL;
npy_intp nd_in = PyArray_SIZE(params->input_broadcastable);
npy_intp nd_out = PyArray_SIZE(params->_new_order);
npy_int64* new_order = NULL;
unsigned int* transposition = NULL;
size_t* sh = NULL;
int e;
if (input->ga.nd != nd_in) {
PyErr_SetString(PyExc_TypeError, "input nd");
return 1;
}
if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
return 1;
}
if (!PyArray_IS_C_CONTIGUOUS(params->transposition)) {
PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: param transposition must be C-contiguous.");
return 1;
}
Py_XDECREF(*out);
/** Do shuffle. **/
new_order = (npy_int64*) PyArray_DATA(params->_new_order);
/* Type of params->transposition (npy_uint32) should be an alias of unsigned int
* on platforms supported by Theano. */
transposition = (unsigned int*) PyArray_DATA(params->transposition);
sh = (size_t*) malloc(nd_out * sizeof(size_t));
if (sh == NULL) {
PyErr_NoMemory();
return 1;
}
tmp = pygpu_transpose(input, transposition);
if (!tmp) {
free(sh);
return 1;
}
e = 0;
for (npy_intp i = 0; i < nd_out; ++i) {
if (new_order[i] == -1) {
sh[i] = 1;
} else {
sh[i] = tmp->ga.dimensions[e];
++e;
}
}
*out = pygpu_reshape(tmp, nd_out, sh, GA_ANY_ORDER, 1, -1);
Py_DECREF(tmp);
free(sh);
if (*out == NULL) {
return 1;
}
/** End shuffle. **/
if (!params->inplace) {
tmp = pygpu_copy(*out, GA_ANY_ORDER);
Py_DECREF(*out);
if (!tmp) {
*out = NULL;
return 1;
}
*out = tmp;
}
return 0;
}
......@@ -407,12 +407,13 @@ class SupportCodeError(Exception):
"""
class GpuDimShuffle(HideC, DimShuffle):
class GpuDimShuffle(DimShuffle):
"""
DimShuffle on the GPU.
"""
_f16_ok = True
c_func_name = 'gpu_dimshuffle'
def make_node(self, input):
ctx_name = infer_context_name(input)
......@@ -430,7 +431,7 @@ class GpuDimShuffle(HideC, DimShuffle):
s = "GpuDimShuffle{%s}"
return s % (','.join(str(x) for x in self.new_order))
def perform(self, node, inp, out):
def perform(self, node, inp, out, params):
input, = inp
storage, = out
......@@ -448,66 +449,6 @@ class GpuDimShuffle(HideC, DimShuffle):
storage[0] = res
def c_support_code_apply(self, node, name):
def copy_shape(nd_out):
stmts = []
e = 0
for d in range(nd_out):
if d in self.augment:
stmts.append("sh[%s] = 1;" % (d,))
else:
stmts.append("sh[%s] = tmp->ga.dimensions[%s];" % (d, e))
e += 1
return '\n '.join(stmts)
return """
static const unsigned int %(name)s_ax[] = {%(shuffle)s};
static PyGpuArrayObject *%(name)s_f(PyGpuArrayObject *a) {
PyGpuArrayObject *res, *tmp;
size_t sh[%(nd_out)s];
tmp = pygpu_transpose(a, %(name)s_ax);
if (!tmp) return NULL;
%(copy_shape)s
res = pygpu_reshape(tmp, %(nd_out)s, sh, GA_ANY_ORDER, 1, -1);
Py_DECREF(tmp);
return res;
}
""" % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
name=name, nd_out=len(self.new_order),
copy_shape=copy_shape(len(self.new_order)))
def c_code(self, node, name, inputs, outputs, sub):
d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
nd=len(self.input_broadcastable))
process = """
PyGpuArrayObject *tmp = NULL;
if (%(inp)s->ga.nd != %(nd)s) {
PyErr_SetString(PyExc_TypeError, "input nd");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = %(name)s_f(%(inp)s);
if (%(out)s == NULL) {%(fail)s}
""" % d
if not self.inplace:
process += """
tmp = pygpu_copy(%(out)s, GA_ANY_ORDER);
Py_DECREF(%(out)s);
if (!tmp) {
%(out)s = NULL;
%(fail)s
}
%(out)s = tmp;
""" % d
return process
def c_code_cache_version(self):
return (5,)
class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
"""
......
#section support_code_apply
int cpu_dimshuffle(PyArrayObject* input, PyArrayObject** res, PARAMS_TYPE* params) {
npy_bool* input_broadcastable;
npy_int64* new_order;
npy_intp nd_in;
npy_intp nd_out;
PyArrayObject* basename;
npy_intp* dimensions;
npy_intp* strides;
if (!PyArray_IS_C_CONTIGUOUS(params->input_broadcastable)) {
PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param input_broadcastable must be C-contiguous.");
return 1;
}
if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
return 1;
}
input_broadcastable = (npy_bool*) PyArray_DATA(params->input_broadcastable);
new_order = (npy_int64*) PyArray_DATA(params->_new_order);
nd_in = PyArray_SIZE(params->input_broadcastable);
nd_out = PyArray_SIZE(params->_new_order);
/* check_input_nd */
if (PyArray_NDIM(input) != nd_in) {
PyErr_SetString(PyExc_NotImplementedError, "input nd");
return 1;
}
/* clear_output */
if (*res)
Py_XDECREF(*res);
/* get_base */
if (params->inplace) {
basename = input;
Py_INCREF((PyObject*)basename);
} else {
basename =
(PyArrayObject*)PyArray_FromAny((PyObject*)input,
NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL);
}
/* shape_statements and strides_statements */
dimensions = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
strides = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
if (dimensions == NULL || strides == NULL) {
PyErr_NoMemory();
free(dimensions);
free(strides);
return 1;
};
for (npy_intp i = 0; i < nd_out; ++i) {
if (new_order[i] != -1) {
dimensions[i] = PyArray_DIMS(basename)[new_order[i]];
strides[i] = PyArray_DIMS(basename)[new_order[i]] == 1 ?
0 : PyArray_STRIDES(basename)[new_order[i]];
} else {
dimensions[i] = 1;
strides[i] = 0;
}
}
/* set the strides of the broadcasted dimensions.
* This algorithm is from numpy: PyArray_Newshape() in
* cvs/numpy/numpy/core/src/multiarraymodule.c */
if (nd_out > 0) {
if (strides[nd_out - 1] == 0)
strides[nd_out - 1] = PyArray_DESCR(basename)->elsize;
for (npy_intp i = nd_out - 2; i > -1; --i) {
if (strides[i] == 0)
strides[i] = strides[i + 1] * dimensions[i + 1];
}
}
/* close_bracket */
// create a new array.
*res = (PyArrayObject*)PyArray_New(&PyArray_Type, nd_out, dimensions,
PyArray_TYPE(basename), strides,
PyArray_DATA(basename), PyArray_ITEMSIZE(basename),
// borrow only the writable flag from the base
// the NPY_OWNDATA flag will default to 0.
(NPY_ARRAY_WRITEABLE * PyArray_ISWRITEABLE(basename)),
NULL);
if (*res == NULL) {
free(dimensions);
free(strides);
return 1;
}
// recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
PyArray_UpdateFlags(*res, NPY_ARRAY_UPDATE_ALL);
// we are making a view in both inplace and non-inplace cases
PyArray_SetBaseObject(*res, (PyObject*)basename);
free(strides);
free(dimensions);
return 0;
}
......@@ -9,7 +9,7 @@ import theano
from theano import gof
from theano.compat import izip
from theano.configparser import change_flags
from theano.gof import Apply, Op, OpenMPOp
from theano.gof import Apply, Op, COp, OpenMPOp, ParamsType
from theano import scalar
from theano.scalar import get_scalar_type
from theano.printing import pprint
......@@ -50,7 +50,7 @@ def TensorConstant(*inputs, **kwargs):
# DimShuffle #
##################
class DimShuffle(Op):
class DimShuffle(COp):
"""
Allows to reorder the dimensions of a tensor or insert or remove
broadcastable dimensions.
......@@ -130,12 +130,33 @@ class DimShuffle(Op):
_f16_ok = True
check_input = False
__props__ = ("input_broadcastable", "new_order", "inplace")
c_func_file = 'c_code/dimshuffle.c'
c_func_name = 'cpu_dimshuffle'
@property
def params_type(self):
# We can't directly create `params_type` as class attribute
# because of importation issues related to TensorType.
return ParamsType(input_broadcastable=TensorType(dtype='bool', broadcastable=(False,)),
_new_order=theano.tensor.lvector,
transposition=TensorType(dtype='uint32', broadcastable=(False,)),
inplace=theano.scalar.bool)
@property
def _new_order(self):
# Param for C code.
# self.new_order may contain 'x', which is not a valid integer value.
# We replace it with -1.
return [(-1 if x == 'x' else x) for x in self.new_order]
@property
def transposition(self):
return self.shuffle + self.drop
def __init__(self, input_broadcastable, new_order, inplace=True):
input_broadcastable = tuple(input_broadcastable)
self.input_broadcastable = input_broadcastable
new_order = tuple(new_order)
self.new_order = new_order
COp.__init__(self, [self.c_func_file], self.c_func_name)
self.input_broadcastable = tuple(input_broadcastable)
self.new_order = tuple(new_order)
if inplace is True:
self.inplace = inplace
else:
......@@ -185,6 +206,13 @@ class DimShuffle(Op):
if self.inplace:
self.view_map = {0: [0]}
def __setstate__(self, state):
self.__dict__.update(state)
if not hasattr(self, 'func_files'):
# Perhaps we are loading an old `Op` version of DimShuffle.
# Let's just build the COp.
COp.__init__(self, [self.c_func_file], self.c_func_name)
def make_node(self, _input):
input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable)
......@@ -222,7 +250,7 @@ class DimShuffle(Op):
else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
def perform(self, node, inp, out):
def perform(self, node, inp, out, params):
input, = inp
storage, = out
# drop
......@@ -260,104 +288,6 @@ class DimShuffle(Op):
return [None]
return self(*eval_points, **dict(return_list=True))
def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
basename = input + '__view_or_copy'
def statements(lst):
return ';\n'.join(lst) + ';'
nd_in = len(self.input_broadcastable)
nd_out = len(self.new_order)
check_input_nd = [('if (PyArray_NDIM(%(input)s) != ' + str(nd_in) + ')'
'{PyErr_SetString(PyExc_NotImplementedError, '
'"input nd"); %(fail)s;}')]
clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
# get the copy / view of the input depending on whether we're doingi
# things inplace or not.
if self.inplace:
get_base = ['{ PyArrayObject * %(basename)s = %(input)s',
'Py_INCREF((PyObject*)%(basename)s)']
else:
get_base = [
('{ PyArrayObject * %(basename)s = '
'(PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s,'
' NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY,'
' NULL)')]
shape_statements = ['npy_intp dimensions[%i]' % nd_out]
for i, o in enumerate(self.new_order):
if o != 'x':
shape_statements += [('dimensions[' + str(
i) + '] = PyArray_DIMS(%(basename)s)[' + str(o) + ']')]
else:
shape_statements += [('dimensions[' + str(i) + '] = 1')]
strides_statements = ['npy_intp strides[%i]' % nd_out]
# set the strides of the non-broadcasted dimensions
for i, o in enumerate(self.new_order):
if o != 'x':
strides_statements += [('strides[' + str(i) +
'] = PyArray_DIMS(%(basename)s)[' +
str(o) +
'] == 1? 0 : '
'PyArray_STRIDES(%(basename)s)[' +
str(o) + ']')]
else:
strides_statements += [('strides[' + str(i) + '] = 0')]
# set the strides of the broadcasted dimensions
# this algorithm is from numpy: PyArray_Newshape() in
# cvs/numpy/numpy/core/src/multiarraymodule.c
if nd_out > 0:
strides_statements.append(
'if (strides[' +
str(nd_out) +
'-1] == 0) strides[' +
str(nd_out) +
'-1] = PyArray_DESCR(%(basename)s)->elsize'
)
for i in xrange(nd_out - 2, -1, -1):
strides_statements.append(
"if (strides[%(i)s] == 0) strides[%(i)s] = strides[%(i)s+1] * "
"dimensions[%(i)s+1]" % dict(i=str(i)))
close_bracket = [
# create a new array,
('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
'' + str(nd_out) + ', dimensions, '
'PyArray_TYPE(%(basename)s), strides, '
'PyArray_DATA(%(basename)s), PyArray_ITEMSIZE(%(basename)s), '
# borrow only the writable flag from the base
# the NPY_OWNDATA flag will default to 0.
'(NPY_ARRAY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), '
'NULL)'),
'if (%(res)s == NULL) %(fail)s;',
# recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)',
# we are making a view in both inplace and non-inplace cases
"""
PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
"""
'}']
full_code = statements(check_input_nd +
clear_output +
get_base +
shape_statements +
strides_statements +
close_bracket)
return full_code % dict(locals(), **sub)
def c_code_cache_version(self):
return (3,)
def grad(self, inp, grads):
x, = inp
gz, = grads
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论