提交 1dcc6180 authored 作者: nouiz's avatar nouiz

Merge pull request #1198 from lamblin/fix_gpucontiguous_output_mem

Make sure preallocated output is contiguous before using it
......@@ -2831,7 +2831,8 @@ class GpuContiguous(GpuOp):
} else if ((NULL == %(z)s)""" % locals()
for i in xrange(len(node.inputs[0].type.broadcastable)):
str += "\n|| (CudaNdarray_HOST_DIMS(%(input)s)[%(i)s] != CudaNdarray_HOST_DIMS(%(z)s)[%(i)s])" % locals()
str += """)
str += """
|| !CudaNdarray_is_c_contiguous(%(z)s))
{
Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_Copy(%(input)s);
......@@ -2847,7 +2848,7 @@ class GpuContiguous(GpuOp):
return str
def c_code_cache_version(self):
return (1,)
return (2,)
gpu_contiguous = GpuContiguous()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论