提交 21d2a0fd authored 作者: Frederic's avatar Frederic

Made DeepCopyOp reuse pre allocated output when possible.

fix gh-1028
上级 05e72586
...@@ -426,17 +426,32 @@ theano.compile.register_view_op_c_code( ...@@ -426,17 +426,32 @@ theano.compile.register_view_op_c_code(
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
CudaNdarrayType, CudaNdarrayType,
""" """
int alloc = %(oname)s == NULL;
for(int i=0; !alloc && i<CudaNdarray_NDIM(%(oname)s); i++) {
if(CudaNdarray_HOST_DIMS(%(iname)s)[i] !=
CudaNdarray_HOST_DIMS(%(oname)s)[i]) {
alloc = true;
break;
}
}
if(alloc) {
Py_XDECREF(%(oname)s); Py_XDECREF(%(oname)s);
%(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s); %(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s);
if (!%(oname)s) if (!%(oname)s)
{ {
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!"); PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed!");
%(fail)s; %(fail)s;
} }
} else {
if(!CudaNdarray_CopyFromCudaNdarray(%(oname)s, %(iname)s)) {
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed into already allocated space!");
%(fail)s;
}
}
""", """,
version=1) version=2)
# THIS WORKS But CudaNdarray instances don't compare equal to one # THIS WORKS But CudaNdarray instances don't compare equal to one
......
...@@ -1091,17 +1091,32 @@ theano.compile.register_view_op_c_code( ...@@ -1091,17 +1091,32 @@ theano.compile.register_view_op_c_code(
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
TensorType, TensorType,
""" """
int alloc = %(oname)s == NULL;
for(int i=0; !alloc && i<PyArray_NDIM(%(oname)s); i++) {
if(PyArray_DIMS(%(iname)s)[i] != PyArray_DIMS(%(oname)s)[i]) {
alloc = true;
break;
}
}
if(alloc) {
Py_XDECREF(%(oname)s); Py_XDECREF(%(oname)s);
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,NPY_ANYORDER); NPY_ANYORDER);
if (!%(oname)s) if (!%(oname)s)
{ {
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!"); PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed!");
%(fail)s; %(fail)s;
} }
} else {
if(PyArray_CopyInto(%(oname)s, %(iname)s)){
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed into already allocated space!");
%(fail)s;
}
}
""", """,
version=1) version=2)
# Easy constructors # Easy constructors
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论