提交 41a4a100 authored 作者: lamblin's avatar lamblin

Merge pull request #548 from nouiz/crash_size_0

Crash size 0
...@@ -60,6 +60,8 @@ Crash Fix ...@@ -60,6 +60,8 @@ Crash Fix
element-wise fusion optimization when upcasting some inputs to element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the GPU). float32 (to compute them on the GPU).
(Frederic B., reported by Sander Dieleman) (Frederic B., reported by Sander Dieleman)
* GpuSoftmaxWithBias with shape (0, N) with N > 1.
(Frédéric B., reported by Razvan P.)
============= =============
Release Notes Release Notes
......
...@@ -419,7 +419,7 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -419,7 +419,7 @@ class GpuSoftmaxWithBias (GpuOp):
return [shape[0]] return [shape[0]]
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (4,) + inline_softmax.code_version return (5,) + inline_softmax.code_version
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
x, b = inp x, b = inp
...@@ -461,14 +461,16 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -461,14 +461,16 @@ class GpuSoftmaxWithBias (GpuOp):
//TODO, detect the maximum number of thread per block. //TODO, detect the maximum number of thread per block.
int n_threads = std::min(CudaNdarray_HOST_DIMS(%(x)s)[1], 1024); int n_threads = std::min(CudaNdarray_HOST_DIMS(%(x)s)[1], 1024);
int n_shared_bytes = CudaNdarray_HOST_DIMS(%(x)s)[1] * 2 * sizeof(float); int n_shared_bytes = CudaNdarray_HOST_DIMS(%(x)s)[1] * 2 * sizeof(float);
if (CudaNdarray_HOST_DIMS(%(x)s)[0] > 0)
kSoftmaxWithBias_%(nodename)s {
<<< kSoftmaxWithBias_%(nodename)s
// todo: cap these at the card limits, implement loops in kernel <<<
n_blocks, // todo: cap these at the card limits,
n_threads, // implement loops in kernel
n_shared_bytes n_blocks,
>>>( n_threads,
n_shared_bytes
>>>(
CudaNdarray_HOST_DIMS(%(x)s)[0], CudaNdarray_HOST_DIMS(%(x)s)[0],
CudaNdarray_HOST_DIMS(%(x)s)[1], CudaNdarray_HOST_DIMS(%(x)s)[1],
...@@ -480,13 +482,17 @@ class GpuSoftmaxWithBias (GpuOp): ...@@ -480,13 +482,17 @@ class GpuSoftmaxWithBias (GpuOp):
CudaNdarray_HOST_STRIDES(%(b)s)[0], CudaNdarray_HOST_STRIDES(%(b)s)[0],
CudaNdarray_DEV_DATA(%(z)s) //guarantee c contig CudaNdarray_DEV_DATA(%(z)s) //guarantee c contig
); );
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) if( cudaSuccess != err)
{ {
PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kSoftmax_%(nodename)s", cudaGetErrorString(err)); PyErr_Format(PyExc_RuntimeError,
%(fail)s; "Cuda error: %%s: %%s.\\n",
"kSoftmaxWithBias_%(nodename)s",
cudaGetErrorString(err));
%(fail)s;
}
} }
} }
assert(%(z)s); assert(%(z)s);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论