提交 10aa713a authored 作者: Pascal Lamblin's avatar Pascal Lamblin

When using memset in GpuAlloc, use only contiguous output

上级 08d46925
...@@ -2721,7 +2721,7 @@ class GpuAlloc(GpuOp): ...@@ -2721,7 +2721,7 @@ class GpuAlloc(GpuOp):
str += "if(%(out)s==NULL\n" % locals() str += "if(%(out)s==NULL\n" % locals()
for idx, sh in enumerate(shps): for idx, sh in enumerate(shps):
str += "||CudaNdarray_HOST_DIMS(%(out)s)[%(idx)s]!=dims[%(idx)s]" % locals() str += "||CudaNdarray_HOST_DIMS(%(out)s)[%(idx)s]!=dims[%(idx)s]" % locals()
str += """){ str += """||(%(memset_0)s && !CudaNdarray_is_c_contiguous(%(out)s))){
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray*)CudaNdarray_New(); %(out)s = (CudaNdarray*)CudaNdarray_New();
if (!%(out)s) if (!%(out)s)
...@@ -2769,7 +2769,7 @@ class GpuAlloc(GpuOp): ...@@ -2769,7 +2769,7 @@ class GpuAlloc(GpuOp):
return [None for i in inputs] return [None for i in inputs]
def c_code_cache_version(self): def c_code_cache_version(self):
return (5,) return (6,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
for client in node.outputs[0].clients: for client in node.outputs[0].clients:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论