提交 0cc49cc4 authored 作者: Frederic's avatar Frederic

Fix compilation crash and strides usage.

上级 1d2ac4e6
...@@ -111,8 +111,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -111,8 +111,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");'] return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dtype = self.dtype typecode_x = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
typecode = pygpu.gpuarray.dtype_to_typecode(dtype) typecode_b = pygpu.gpuarray.dtype_to_typecode(node.outputs[1].dtype)
typecode_y_idx = pygpu.gpuarray.dtype_to_typecode(node.outputs[2].dtype)
itemsize_x = numpy.dtype(node.inputs[0].dtype).itemsize
itemsize_b = numpy.dtype(node.inputs[1].dtype).itemsize
itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
itemsize_nll = numpy.dtype(node.outputs[0].dtype).itemsize
itemsize_sm = numpy.dtype(node.outputs[1].dtype).itemsize
itemsize_am = numpy.dtype(node.outputs[2].dtype).itemsize
x, b, y_idx = inp x, b, y_idx = inp
nll, sm, am = out nll, sm, am = out
classname = self.__class__.__name__ classname = self.__class__.__name__
...@@ -153,7 +160,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -153,7 +160,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{ {
Py_XDECREF(%(nll)s); Py_XDECREF(%(nll)s);
%(nll)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s), %(nll)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s),
%(typecode)s, %(typecode_x)s,
GA_C_ORDER, GA_C_ORDER,
pygpu_default_context(), Py_None); pygpu_default_context(), Py_None);
if (!%(nll)s) { if (!%(nll)s) {
...@@ -168,7 +175,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -168,7 +175,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{ {
Py_XDECREF(%(sm)s); Py_XDECREF(%(sm)s);
%(sm)s = pygpu_empty(2, PyGpuArray_DIMS(%(x)s), %(sm)s = pygpu_empty(2, PyGpuArray_DIMS(%(x)s),
%(typecode)s, %(typecode_b)s,
GA_C_ORDER, GA_C_ORDER,
pygpu_default_context(), Py_None); pygpu_default_context(), Py_None);
if(!%(sm)s) if(!%(sm)s)
...@@ -185,7 +192,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -185,7 +192,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{ {
Py_XDECREF(%(am)s); Py_XDECREF(%(am)s);
%(am)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s), %(am)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s),
%(typecode)s, %(typecode_y_idx)s,
GA_C_ORDER, GA_C_ORDER,
pygpu_default_context(), Py_None); pygpu_default_context(), Py_None);
if(!%(am)s) if(!%(am)s)
...@@ -208,26 +215,25 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -208,26 +215,25 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
(dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) + (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset); %(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0] / %(itemsize_x)s,
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1] / %(itemsize_x)s,
(dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) + (dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
%(b)s->ga.offset); %(b)s->ga.offset),
PyGpuArray_STRIDES(%(b)s)[0], PyGpuArray_STRIDES(%(b)s)[0] / %(itemsize_b)s,
(dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) + (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset); %(y_idx)s->ga.offset),
PyGpuArray_STRIDES(%(y_idx)s)[0], PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,
(dtype_%(nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) + (dtype_%(nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) +
%(nll)s->ga.offset); %(nll)s->ga.offset),
PyGpuArray_STRIDES(%(nll)s)[0], PyGpuArray_STRIDES(%(nll)s)[0] / %(itemsize_nll)s,
(dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) + (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset); %(sm)s->ga.offset),
PyGpuArray_STRIDES(%(sm)s)[0], PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
PyGpuArray_STRIDES(%(sm)s)[1], PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,
(dtype_%(am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) + (dtype_%(am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) +
%(am)s->ga.offset); %(am)s->ga.offset),
PyGpuArray_STRIDES(%(am)s)[0]); PyGpuArray_STRIDES(%(am)s)[0] / %(itemsize_am)s);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) if (cudaSuccess != err)
{ {
...@@ -291,7 +297,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -291,7 +297,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
return NVCC_compiler return NVCC_compiler
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
typecode = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype) typecode_dx = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
itemsize_dnll = numpy.dtype(node.inputs[0].dtype).itemsize
itemsize_sm = numpy.dtype(node.inputs[1].dtype).itemsize
itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
itemsize_dx = numpy.dtype(node.outputs[0].dtype).itemsize
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
dx, = out dx, = out
fail = sub['fail'] fail = sub['fail']
...@@ -327,7 +337,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -327,7 +337,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
{ {
Py_XDECREF(%(dx)s); Py_XDECREF(%(dx)s);
%(dx)s = pygpu_empty(2, PyGpuArray_DIMS(%(sm)s), %(dx)s = pygpu_empty(2, PyGpuArray_DIMS(%(sm)s),
%(typecode)s, %(typecode_dx)s,
GA_C_ORDER, GA_C_ORDER,
pygpu_default_context(), Py_None); pygpu_default_context(), Py_None);
if (!%(dx)s) { if (!%(dx)s) {
...@@ -344,22 +354,22 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -344,22 +354,22 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_DIMS(%(dx)s)[1], PyGpuArray_DIMS(%(dx)s)[1],
(dtype_%(dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) + (dtype_%(dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) +
%(dnll)s->ga.offset); %(dnll)s->ga.offset),
PyGpuArray_STRIDES(%(dnll)s)[0], PyGpuArray_STRIDES(%(dnll)s)[0] / %(itemsize_dnll)s,
(dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) + (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset); %(sm)s->ga.offset),
PyGpuArray_STRIDES(%(sm)s)[0], PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
PyGpuArray_STRIDES(%(sm)s)[1], PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,
(dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) + (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset); %(y_idx)s->ga.offset),
PyGpuArray_STRIDES(%(y_idx)s)[0], PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,
(dtype_%(dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) + (dtype_%(dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) +
%(dx)s->ga.offset); %(dx)s->ga.offset),
PyGpuArray_STRIDES(%(dx)s)[0], PyGpuArray_STRIDES(%(dx)s)[0] / %(itemsize_dx)s,
PyGpuArray_STRIDES(%(dx)s)[1] PyGpuArray_STRIDES(%(dx)s)[1] / %(itemsize_dx)s
); );
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) if( cudaSuccess != err)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论