提交 5abcfd6a authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Make sure preallocated output is contiguous before using it

上级 95adacbd
...@@ -2831,7 +2831,8 @@ class GpuContiguous(GpuOp): ...@@ -2831,7 +2831,8 @@ class GpuContiguous(GpuOp):
} else if ((NULL == %(z)s)""" % locals() } else if ((NULL == %(z)s)""" % locals()
for i in xrange(len(node.inputs[0].type.broadcastable)): for i in xrange(len(node.inputs[0].type.broadcastable)):
str += "\n|| (CudaNdarray_HOST_DIMS(%(input)s)[%(i)s] != CudaNdarray_HOST_DIMS(%(z)s)[%(i)s])" % locals() str += "\n|| (CudaNdarray_HOST_DIMS(%(input)s)[%(i)s] != CudaNdarray_HOST_DIMS(%(z)s)[%(i)s])" % locals()
str += """) str += """
|| !CudaNdarray_is_c_contiguous(%(z)s))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_Copy(%(input)s); %(z)s = (CudaNdarray*)CudaNdarray_Copy(%(input)s);
...@@ -2847,7 +2848,7 @@ class GpuContiguous(GpuOp): ...@@ -2847,7 +2848,7 @@ class GpuContiguous(GpuOp):
return str return str
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
gpu_contiguous = GpuContiguous() gpu_contiguous = GpuContiguous()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论