提交 dc09e0fa authored 作者: lamblin's avatar lamblin

Merge pull request #1045 from nouiz/deepcopy

Made DeepCopyOp reuse pre allocated output when possible.
......@@ -427,17 +427,32 @@ theano.compile.register_view_op_c_code(
theano.compile.register_deep_copy_op_c_code(
CudaNdarrayType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!");
%(fail)s;
int alloc = %(oname)s == NULL;
for(int i=0; !alloc && i<CudaNdarray_NDIM(%(oname)s); i++) {
if(CudaNdarray_HOST_DIMS(%(iname)s)[i] !=
CudaNdarray_HOST_DIMS(%(oname)s)[i]) {
alloc = true;
break;
}
}
if(alloc) {
Py_XDECREF(%(oname)s);
%(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed!");
%(fail)s;
}
} else {
if(!CudaNdarray_CopyFromCudaNdarray(%(oname)s, %(iname)s)) {
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed into already allocated space!");
%(fail)s;
}
}
""",
version=1)
version=2)
# THIS WORKS But CudaNdarray instances don't compare equal to one
......
......@@ -1092,17 +1092,32 @@ theano.compile.register_view_op_c_code(
theano.compile.register_deep_copy_op_c_code(
TensorType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,NPY_ANYORDER);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!");
%(fail)s;
int alloc = %(oname)s == NULL;
for(int i=0; !alloc && i<PyArray_NDIM(%(oname)s); i++) {
if(PyArray_DIMS(%(iname)s)[i] != PyArray_DIMS(%(oname)s)[i]) {
alloc = true;
break;
}
}
if(alloc) {
Py_XDECREF(%(oname)s);
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,
NPY_ANYORDER);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed!");
%(fail)s;
}
} else {
if(PyArray_CopyInto(%(oname)s, %(iname)s)){
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed into already allocated space!");
%(fail)s;
}
}
""",
version=1)
version=2)
# Easy constructors
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论