提交 00a3e6b3 authored 作者: Frederic Bastien's avatar Frederic Bastien

fix bug in gpu convolution introduced in commit 7122f9987ab4d the 31 januarry 2011.

上级 f0dedb92
......@@ -363,7 +363,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self):
return (0,9) # raise this whenever modifying any of the support_code_files
return (0,10) # raise this whenever modifying any of the support_code_files
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of these files
......
......@@ -474,9 +474,9 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
//if we can't fit the kernel in shared memory, we must split it more.
nb_split++;
thread_z=ceil_intdiv(kern_len,nb_split);
shared_size=sizeof(float)*std::max(
img_size + kern_wid*thread_z,
out_size*thread_z);
shared_size = sizeof(float)*(full_kern
? std::max(img_size + kern_size, out_size*thread_z)
: std::max(img_size + thread_z*kern_wid, out_size*thread_z));
}
if (nb_split <= kern_len)
{
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论