提交 b8f31218 authored 作者: notoraptor's avatar notoraptor

Wrap op params for theano.gpuarray.elemwise.GpuDimShuffle.

上级 f98e8f99
#section support_code
#section support_code_apply
int gpu_dimshuffle(PyGpuArrayObject* input, PyGpuArrayObject** out, PARAMS_TYPE* params) {
PyGpuArrayObject *tmp = NULL;
npy_intp nd_in = PyArray_SIZE(params->input_broadcastable);
npy_intp nd_out = PyArray_SIZE(params->_new_order);
npy_int64* new_order = NULL;
unsigned int* transposition = NULL;
size_t* sh = NULL;
int e;
if (input->ga.nd != nd_in) {
PyErr_SetString(PyExc_TypeError, "input nd");
return 1;
}
if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
return 1;
}
if (!PyArray_IS_C_CONTIGUOUS(params->transposition)) {
PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: param transposition must be C-contiguous.");
return 1;
}
Py_XDECREF(*out);
/** Do shuffle. **/
new_order = (npy_int64*) PyArray_DATA(params->_new_order);
transposition = (unsigned int*) malloc(nd_in * sizeof(unsigned int));
sh = (size_t*) malloc(nd_out * sizeof(size_t));
if (transposition == NULL || sh == NULL) {
PyErr_NoMemory();
free(transposition);
free(sh);
return 1;
}
for (npy_intp i = 0; i < nd_in; ++i) {
transposition[i] = ((npy_int64*) PyArray_DATA(params->transposition))[i];
}
tmp = pygpu_transpose(input, transposition);
if (!tmp) {
PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to transpose input.");
free(transposition);
free(sh);
return 1;
}
e = 0;
for (npy_intp i = 0; i < nd_out; ++i) {
if (new_order[i] == -1) {
sh[i] = 1;
} else {
sh[i] = tmp->ga.dimensions[e];
++e;
}
}
*out = pygpu_reshape(tmp, nd_out, sh, GA_ANY_ORDER, 1, -1);
Py_DECREF(tmp);
free(transposition);
free(sh);
/** End shuffle. **/
if (*out == NULL) {
PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to reshape output.");
return 1;
}
if (!params->inplace) {
tmp = pygpu_copy(*out, GA_ANY_ORDER);
Py_DECREF(*out);
if (!tmp) {
PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to copy output.");
*out = NULL;
return 1;
}
*out = tmp;
}
return 0;
}
......@@ -407,12 +407,13 @@ class SupportCodeError(Exception):
"""
class GpuDimShuffle(HideC, DimShuffle):
class GpuDimShuffle(DimShuffle):
"""
DimShuffle on the GPU.
"""
_f16_ok = True
c_func_name = 'gpu_dimshuffle'
def make_node(self, input):
ctx_name = infer_context_name(input)
......@@ -448,66 +449,6 @@ class GpuDimShuffle(HideC, DimShuffle):
storage[0] = res
def c_support_code_apply(self, node, name):
def copy_shape(nd_out):
stmts = []
e = 0
for d in range(nd_out):
if d in self.augment:
stmts.append("sh[%s] = 1;" % (d,))
else:
stmts.append("sh[%s] = tmp->ga.dimensions[%s];" % (d, e))
e += 1
return '\n '.join(stmts)
return """
static const unsigned int %(name)s_ax[] = {%(shuffle)s};
static PyGpuArrayObject *%(name)s_f(PyGpuArrayObject *a) {
PyGpuArrayObject *res, *tmp;
size_t sh[%(nd_out)s];
tmp = pygpu_transpose(a, %(name)s_ax);
if (!tmp) return NULL;
%(copy_shape)s
res = pygpu_reshape(tmp, %(nd_out)s, sh, GA_ANY_ORDER, 1, -1);
Py_DECREF(tmp);
return res;
}
""" % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
name=name, nd_out=len(self.new_order),
copy_shape=copy_shape(len(self.new_order)))
def c_code(self, node, name, inputs, outputs, sub):
d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
nd=len(self.input_broadcastable))
process = """
PyGpuArrayObject *tmp = NULL;
if (%(inp)s->ga.nd != %(nd)s) {
PyErr_SetString(PyExc_TypeError, "input nd");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = %(name)s_f(%(inp)s);
if (%(out)s == NULL) {%(fail)s}
""" % d
if not self.inplace:
process += """
tmp = pygpu_copy(%(out)s, GA_ANY_ORDER);
Py_DECREF(%(out)s);
if (!tmp) {
%(out)s = NULL;
%(fail)s
}
%(out)s = tmp;
""" % d
return process
def c_code_cache_version(self):
return (5,)
class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
"""
......@@ -563,7 +504,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_mask = tuple(reduce_mask)
self.reduce_mask = reduce_mask
# used to make sure that calls to scalar op
# used to make sure that callfs to scalar op
# have unique name arguments
self._n_scalar_op_calls = 0
CAReduceDtype.__init__(self, scalar_op, axis=axis,
......
......@@ -139,6 +139,7 @@ class DimShuffle(COp):
# because of importation issues related to TensorType.
return ParamsType(input_broadcastable=TensorType(dtype='bool', broadcastable=(False,)),
_new_order=theano.tensor.lvector,
transposition=theano.tensor.lvector,
inplace=theano.scalar.bool)
@property
......@@ -148,6 +149,10 @@ class DimShuffle(COp):
# We replace it with -1.
return [(-1 if x == 'x' else x) for x in self.new_order]
@property
def transposition(self):
return self.shuffle + self.drop
def __init__(self, input_broadcastable, new_order, inplace=True):
COp.__init__(self, [self.c_func_file], self.c_func_name)
self.input_broadcastable = tuple(input_broadcastable)
......@@ -206,8 +211,6 @@ class DimShuffle(COp):
if not hasattr(self, 'func_files'):
# Perhaps we are loading an old `Op` version of DimShuffle.
# Let's just build the COp.
self.c_func_file = 'c_code/dimshuffle.c'
self.c_func_name = 'cpu_dimshuffle'
COp.__init__(self, [self.c_func_file], self.c_func_name)
def make_node(self, _input):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论