提交 4ba0c034 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2961 from nouiz/fix_free

Fix free
...@@ -69,7 +69,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -69,7 +69,7 @@ class GpuCumsum(CumsumOp, GpuOp):
return "%s{%s}" % (self.__class__.__name__, self.axis) return "%s{%s}" % (self.__class__.__name__, self.axis)
def c_code_cache_version(self): def c_code_cache_version(self):
return (8,) return (9,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return """ return """
...@@ -306,6 +306,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -306,6 +306,7 @@ class GpuCumsum(CumsumOp, GpuOp):
if (dimGridX > 1) { if (dimGridX > 1) {
// Do a cumsum over the blockSum (recursive). // Do a cumsum over the blockSum (recursive).
if (cumSum_%(nodename)s(deviceBlockSum, deviceBlockSum, 0, maxThreads, maxGridY, maxGridZ) == -1){ if (cumSum_%(nodename)s(deviceBlockSum, deviceBlockSum, 0, maxThreads, maxGridY, maxGridZ) == -1){
Py_DECREF(deviceBlockSum);
return -1; return -1;
} }
...@@ -342,8 +343,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -342,8 +343,7 @@ class GpuCumsum(CumsumOp, GpuOp):
} }
} }
} }
Py_DECREF(deviceBlockSum);
cudaFree(CudaNdarray_DEV_DATA(deviceBlockSum));
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
return 0; return 0;
} }
......
...@@ -1874,7 +1874,8 @@ blas_optdb.register('local_gemm_to_gemv', ...@@ -1874,7 +1874,8 @@ blas_optdb.register('local_gemm_to_gemv',
local_gemm_to_ger, local_gemm_to_ger,
local_dot22_to_ger_or_gemv, local_dot22_to_ger_or_gemv,
local_dimshuffle_lift], local_dimshuffle_lift],
max_use_ratio=5), max_use_ratio=5,
ignore_newtrees=False),
15, 'fast_run') 15, 'fast_run')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论