提交 77f31aba authored 作者: Frederic's avatar Frederic

remove the import of pycuda in the new back-end.

上级 394844a7
......@@ -175,9 +175,34 @@ class GpuElemwise(HideC, Elemwise):
# implementation
k = self.generate_kernel(node, nodename)
nd = node.inputs[0].type.ndim
import pycuda._cluda
CLUDA_PREAMBLE = """
#define local_barrier() __syncthreads();
#define WITHIN_KERNEL __device__
#define KERNEL extern "C" __global__
#define GLOBAL_MEM /* empty */
#define LOCAL_MEM __shared__
#define LOCAL_MEM_ARG /* empty */
#define REQD_WG_SIZE(X,Y,Z) __launch_bounds__(X*Y*Z, 1)
#define LID_0 threadIdx.x
#define LID_1 threadIdx.y
#define LID_2 threadIdx.z
#define GID_0 blockIdx.x
#define GID_1 blockIdx.y
#define GID_2 blockIdx.z
#define LDIM_0 blockDim.x
#define LDIM_1 blockDim.y
#define LDIM_2 blockDim.z
#define GDIM_0 gridDim.x
#define GDIM_1 gridDim.y
#define GDIM_2 gridDim.z
"""
res = ["CUdeviceptr (*cuda_get_ptr)(gpudata *g);",
pycuda._cluda.CLUDA_PREAMBLE]
CLUDA_PREAMBLE]
for i in range(0, nd + 1):
res.append(k.render_basic(i, name="elem_" + str(i)) + ';')
res.append(k.contig_src + ';')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论