提交 1d2ac4e6 authored 作者: Frederic's avatar Frederic

Fix compilation crash

上级 4e53fc30
...@@ -105,7 +105,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -105,7 +105,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
} }
CUdeviceptr (*cuda_get_ptr)(gpudata *g); CUdeviceptr (*cuda_get_ptr)(gpudata *g);
""" """ % locals()
def c_init_code(self): def c_init_code(self):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");'] return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
...@@ -198,14 +198,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -198,14 +198,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
} }
} }
{ {
int n_blocks = std::min(PyGpuArray_DIMS(%(x)s)[0], int n_blocks = PyGpuArray_DIMS(%(x)s)[0] < 256 ? PyGpuArray_DIMS(%(x)s)[0] : 256;
256);
//TODO: launch more threads per row and do parallel sum and max reductions //TODO: launch more threads per row and do parallel sum and max reductions
int n_threads = 1; int n_threads = 1;
int n_shared_bytes = 0; //n_threads * sizeof(%(dtype)s); int n_shared_bytes = 0; //n_threads * sizeof(dtype);
k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>( k_xent_sm_1hot_bias_%(nodename)s<<<n_blocks, n_threads, n_shared_bytes>>>(
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
(dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) + (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
...@@ -336,9 +335,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -336,9 +335,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
} }
} }
{ {
int n_blocks = std::min(PyGpuArray_DIMS(%(dx)s)[0], int n_blocks = PyGpuArray_DIMS(%(dx)s)[0] < 256 ? PyGpuArray_DIMS(%(dx)s)[0] : 256;
256); int n_threads = PyGpuArray_DIMS(%(dx)s)[1] < 256 ? PyGpuArray_DIMS(%(dx)s)[1] : 256;
int n_threads = std::min(PyGpuArray_DIMS(%(dx)s)[1],256);
kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s
<<<n_blocks, n_threads>>>( <<<n_blocks, n_threads>>>(
...@@ -363,7 +361,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -363,7 +361,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_STRIDES(%(dx)s)[0], PyGpuArray_STRIDES(%(dx)s)[0],
PyGpuArray_STRIDES(%(dx)s)[1] PyGpuArray_STRIDES(%(dx)s)[1]
); );
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) if( cudaSuccess != err)
{ {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论