提交 acb1a0e1 authored 作者: Frederic's avatar Frederic

fix GpuSoftmaxWithBias when the input have size 0.

上级 f16aee3d
......@@ -60,6 +60,8 @@ Crash Fix
element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the GPU).
(Frederic B., reported by Sander Dieleman)
* GpuSoftmaxWithBias with shape (0, N) with N > 1.
(Frédéric B., reported by Razvan P.)
=============
Release Notes
......
......@@ -419,7 +419,7 @@ class GpuSoftmaxWithBias (GpuOp):
return [shape[0]]
def c_code_cache_version(self):
#return ()
return (4,) + inline_softmax.code_version
return (5,) + inline_softmax.code_version
def c_code(self, node, nodename, inp, out, sub):
x, b = inp
......@@ -461,14 +461,16 @@ class GpuSoftmaxWithBias (GpuOp):
//TODO, detect the maximum number of thread per block.
int n_threads = std::min(CudaNdarray_HOST_DIMS(%(x)s)[1], 1024);
int n_shared_bytes = CudaNdarray_HOST_DIMS(%(x)s)[1] * 2 * sizeof(float);
kSoftmaxWithBias_%(nodename)s
<<<
// todo: cap these at the card limits, implement loops in kernel
n_blocks,
n_threads,
n_shared_bytes
>>>(
if (CudaNdarray_HOST_DIMS(%(x)s)[0] > 0)
{
kSoftmaxWithBias_%(nodename)s
<<<
// todo: cap these at the card limits,
// implement loops in kernel
n_blocks,
n_threads,
n_shared_bytes
>>>(
CudaNdarray_HOST_DIMS(%(x)s)[0],
CudaNdarray_HOST_DIMS(%(x)s)[1],
......@@ -480,13 +482,17 @@ class GpuSoftmaxWithBias (GpuOp):
CudaNdarray_HOST_STRIDES(%(b)s)[0],
CudaNdarray_DEV_DATA(%(z)s) //guarantee c contig
);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kSoftmax_%(nodename)s", cudaGetErrorString(err));
%(fail)s;
);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
PyErr_Format(PyExc_RuntimeError,
"Cuda error: %%s: %%s.\\n",
"kSoftmaxWithBias_%(nodename)s",
cudaGetErrorString(err));
%(fail)s;
}
}
}
assert(%(z)s);
......
......@@ -142,7 +142,10 @@ def test_softmax_with_bias():
TODO: check that we loop when their is too much thread.(THIS IS NOT IMPLEMENTED)
"""
x = T.fmatrix('x')
z = T.nnet.softmax_with_bias(x, T.zeros_like(x[0,:]))
# We can't use zeros_like(x[0,::]) as this don't allow to test with
# 0 shape.
z = T.nnet.softmax_with_bias(x, T.alloc(numpy.asarray(0, dtype='float32'),
x.shape[1]))
f = theano.function([x],z, mode=mode_without_gpu)
f_gpu = theano.function([x],z, mode=mode_with_gpu)
......@@ -165,6 +168,7 @@ def test_softmax_with_bias():
#we need to test n>32*1024 to check that we make the block loop.
cmp(2<<15, 5)
cmp(4074, 400)
cmp(0, 10)
cmp(4, 1000, True)
cmp(4, 1024, True)
cmp(4, 2000, True)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论