提交 00a3e6b3 authored 作者: Frederic Bastien's avatar Frederic Bastien

fix bug in gpu convolution introduced in commit 7122f9987ab4d the 31 januarry 2011.

上级 f0dedb92
...@@ -363,7 +363,7 @@ class GpuConv(Op): ...@@ -363,7 +363,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,9) # raise this whenever modifying any of the support_code_files return (0,10) # raise this whenever modifying any of the support_code_files
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of these files # REMEMBER TO RAISE c_code_cache_version when changing any of these files
......
...@@ -474,9 +474,9 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -474,9 +474,9 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
//if we can't fit the kernel in shared memory, we must split it more. //if we can't fit the kernel in shared memory, we must split it more.
nb_split++; nb_split++;
thread_z=ceil_intdiv(kern_len,nb_split); thread_z=ceil_intdiv(kern_len,nb_split);
shared_size=sizeof(float)*std::max( shared_size = sizeof(float)*(full_kern
img_size + kern_wid*thread_z, ? std::max(img_size + kern_size, out_size*thread_z)
out_size*thread_z); : std::max(img_size + thread_z*kern_wid, out_size*thread_z));
} }
if (nb_split <= kern_len) if (nb_split <= kern_len)
{ {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论