提交 a8c08487 authored 作者: Pierre Luc Carrier's avatar Pierre Luc Carrier

Convert output creation to use pygpu_empty and replace PyArray_DATA with updated…

Convert output creation to use pygpu_empty and replace PyArray_DATA with updated code in ops GpuSoftmax and GpuSoftmaxWithBias.
上级 b47b95c3
...@@ -469,9 +469,13 @@ class GpuSoftmax (Op): ...@@ -469,9 +469,13 @@ class GpuSoftmax (Op):
def c_compiler(self): def c_compiler(self):
return NVCC_compiler return NVCC_compiler
def c_init_code(self):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dtype = self.dtype dtype = self.dtype
typecode = pygpu.gpuarray.dtype_to_typecode(dtype)
x, = inp x, = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
...@@ -488,15 +492,13 @@ class GpuSoftmax (Op): ...@@ -488,15 +492,13 @@ class GpuSoftmax (Op):
PyGpuArray_DIMS(%(x)s)[1])) PyGpuArray_DIMS(%(x)s)[1]))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_New(); %(z)s = pygpu_empty(2, PyGpuArray_DIMS(%(x)s),
if ((NULL == %(z)s) %(typecode)s,
|| CudaNdarray_alloc_contiguous(%(z)s, 2, GA_C_ORDER,
PyGpuArray_DIMS(%(x)s))) pygpu_default_context(), Py_None);
{ if (!%(nll)s) {
Py_XDECREF(%(z)s); %(fail)s
%(z)s = NULL; }
%(fail)s;
}
} }
{ {
int n_blocks = std::min(PyGpuArray_DIMS(%(x)s)[0], int n_blocks = std::min(PyGpuArray_DIMS(%(x)s)[0],
...@@ -521,11 +523,13 @@ class GpuSoftmax (Op): ...@@ -521,11 +523,13 @@ class GpuSoftmax (Op):
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
PyArray_DATA(%(x)s), (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0],
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1],
PyArray_DATA(%(z)s), (dtype_%(z)s*)(((char *)cuda_get_ptr(%(z)s->ga.data)) +
%(z)s->ga.offset);
PyGpuArray_STRIDES(%(z)s)[0], PyGpuArray_STRIDES(%(z)s)[0],
PyGpuArray_STRIDES(%(z)s)[1] PyGpuArray_STRIDES(%(z)s)[1]
); );
...@@ -539,11 +543,13 @@ class GpuSoftmax (Op): ...@@ -539,11 +543,13 @@ class GpuSoftmax (Op):
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
PyArray_DATA(%(x)s), (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0],
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1],
PyArray_DATA(%(z)s), (dtype_%(z)s*)(((char *)cuda_get_ptr(%(z)s->ga.data)) +
%(z)s->ga.offset);
PyGpuArray_STRIDES(%(z)s)[0], PyGpuArray_STRIDES(%(z)s)[0],
PyGpuArray_STRIDES(%(z)s)[1] PyGpuArray_STRIDES(%(z)s)[1]
); );
...@@ -605,7 +611,8 @@ class GpuSoftmax (Op): ...@@ -605,7 +611,8 @@ class GpuSoftmax (Op):
"__syncthreads()", "__syncthreads()",
"}", "}",
]) ])
return ret1 + "\n" + ret2 ret3 = "CUdeviceptr (*cuda_get_ptr)(gpudata *g);"
return ret1 + "\n" + ret2 + "\n"= ret3
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
...@@ -641,9 +648,13 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -641,9 +648,13 @@ class GpuSoftmaxWithBias (GpuOp):
def c_compiler(self): def c_compiler(self):
return NVCC_compiler return NVCC_compiler
def c_init_code(self):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dtype = self.dtype dtype = self.dtype
typecode = pygpu.gpuarray.dtype_to_typecode(dtype)
x, b = inp x, b = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
...@@ -675,15 +686,13 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -675,15 +686,13 @@ class GpuSoftmaxWithBias (GpuOp):
PyGpuArray_DIMS(%(x)s)[1])) PyGpuArray_DIMS(%(x)s)[1]))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_New(); %(z)s = pygpu_empty(2, PyGpuArray_DIMS(%(x)s),
if ((NULL == %(z)s) %(typecode)s,
|| CudaNdarray_alloc_contiguous(%(z)s, 2, GA_C_ORDER,
PyGpuArray_DIMS(%(x)s))) pygpu_default_context(), Py_None);
{ if (!%(nll)s) {
Py_XDECREF(%(z)s); %(fail)s
%(z)s = NULL; }
%(fail)s;
}
} }
{ {
int n_blocks = std::min(PyGpuArray_DIMS(%(x)s)[0],32*1024); int n_blocks = std::min(PyGpuArray_DIMS(%(x)s)[0],32*1024);
...@@ -703,14 +712,17 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -703,14 +712,17 @@ class GpuSoftmaxWithBias (GpuOp):
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
PyArray_DATA(%(x)s), (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0],
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1],
PyArray_DATA(%(b)s), (dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
%(b)s->ga.offset),
PyGpuArray_STRIDES(%(b)s)[0], PyGpuArray_STRIDES(%(b)s)[0],
PyArray_DATA(%(z)s), (dtype_%(z)s*)(((char *)cuda_get_ptr(%(z)s->ga.data)) +
%(z)s->ga.offset),
PyGpuArray_STRIDES(%(z)s)[0], PyGpuArray_STRIDES(%(z)s)[0],
PyGpuArray_STRIDES(%(z)s)[1] PyGpuArray_STRIDES(%(z)s)[1]
); );
...@@ -724,14 +736,17 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -724,14 +736,17 @@ class GpuSoftmaxWithBias (GpuOp):
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
PyArray_DATA(%(x)s), (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0],
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1],
PyArray_DATA(%(b)s), (dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
%(b)s->ga.offset),
PyGpuArray_STRIDES(%(b)s)[0], PyGpuArray_STRIDES(%(b)s)[0],
PyArray_DATA(%(z)s), (dtype_%(z)s*)(((char *)cuda_get_ptr(%(z)s->ga.data)) +
%(z)s->ga.offset),
PyGpuArray_STRIDES(%(z)s)[0], PyGpuArray_STRIDES(%(z)s)[0],
PyGpuArray_STRIDES(%(z)s)[1] PyGpuArray_STRIDES(%(z)s)[1]
); );
...@@ -799,6 +814,7 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -799,6 +814,7 @@ class GpuSoftmaxWithBias (GpuOp):
"__syncthreads()", "__syncthreads()",
"}", "}",
]) ])
return ret1 + "\n" + ret2 ret3 = "CUdeviceptr (*cuda_get_ptr)(gpudata *g);"
return ret1 + "\n" + ret2 + "\n"= ret3
gpu_softmax_with_bias = GpuSoftmaxWithBias() gpu_softmax_with_bias = GpuSoftmaxWithBias()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论