提交 0b5cccef authored 作者: sebastien-j's avatar sebastien-j

ValueError when n_streams is too large (On GPU)

上级 7d286d04
...@@ -734,6 +734,13 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -734,6 +734,13 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
unsigned int threads_per_block = std::min((unsigned int)n_streams_used_in_this_call, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); unsigned int threads_per_block = std::min((unsigned int)n_streams_used_in_this_call, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
unsigned int n_blocks = std::min(ceil_intdiv((unsigned int)n_streams_used_in_this_call, threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS); unsigned int n_blocks = std::min(ceil_intdiv((unsigned int)n_streams_used_in_this_call, threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS);
if (n_streams > (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK * (unsigned int)NUM_VECTOR_OP_BLOCKS)
{
PyErr_Format(PyExc_ValueError, "On GPU, n_streams should be at most %%u",
(unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK * (unsigned int)NUM_VECTOR_OP_BLOCKS);
%(fail)s;
}
if (threads_per_block * n_blocks < n_streams) if (threads_per_block * n_blocks < n_streams)
{ {
if (! %(nodename)s_printed_warning) if (! %(nodename)s_printed_warning)
...@@ -761,7 +768,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -761,7 +768,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (8,) return ()
class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论