提交 3e40d56a authored 作者: Frederic's avatar Frederic

cuda cnmem at one more place cublas batcheddot

上级 4e70bd89
...@@ -137,13 +137,9 @@ class BatchedDotOp(GpuOp): ...@@ -137,13 +137,9 @@ class BatchedDotOp(GpuOp):
host_z[i] = host_z[i - 1] + z_stride; host_z[i] = host_z[i - 1] + z_stride;
} }
err1 = cudaMalloc((void **)&gpu_x, ptr_array_size); gpu_x = (float **) device_malloc(ptr_array_size);
if (err1 != cudaSuccess) if (gpu_x == NULL){
{
CLEANUP();
PyErr_Format(PyExc_RuntimeError,
"%%s", "cudaMalloc failure");
%(fail)s; %(fail)s;
} }
...@@ -195,7 +191,7 @@ class BatchedDotOp(GpuOp): ...@@ -195,7 +191,7 @@ class BatchedDotOp(GpuOp):
do \ do \
{ \ { \
if (host_x) free (host_x); \ if (host_x) free (host_x); \
if (gpu_x) cudaFree(gpu_x); \ if (gpu_x) device_free(gpu_x); \
} while (0) } while (0)
""" """
...@@ -213,6 +209,9 @@ class BatchedDotOp(GpuOp): ...@@ -213,6 +209,9 @@ class BatchedDotOp(GpuOp):
return rval return rval
def c_code_cache_version(self):
return (1,)
batched_dot = BatchedDotOp() batched_dot = BatchedDotOp()
class GpuDot22(GpuOp): class GpuDot22(GpuOp):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论