提交 a0427809 authored 作者: Frederic Bastien's avatar Frederic Bastien

if the kernel shape is gived, unroll the GpuConv by the kernel wid. Otherwise don't unroll.

上级 500edda8
...@@ -77,8 +77,7 @@ if compile_cuda_ndarray: ...@@ -77,8 +77,7 @@ if compile_cuda_ndarray:
os.makedirs(cuda_ndarray_loc) os.makedirs(cuda_ndarray_loc)
nvcc_compiler.nvcc_module_compile_str('cuda_ndarray', code, location = cuda_ndarray_loc, nvcc_compiler.nvcc_module_compile_str('cuda_ndarray', code, location = cuda_ndarray_loc,
include_dirs=[cuda_path], libs=['cublas'], include_dirs=[cuda_path], libs=['cublas'])
preargs=['-DDONT_UNROLL', '-O3'])
from cuda_ndarray.cuda_ndarray import * from cuda_ndarray.cuda_ndarray import *
......
...@@ -132,7 +132,8 @@ class GpuConv(Op): ...@@ -132,7 +132,8 @@ class GpuConv(Op):
logical_kern_hw=None, logical_kern_hw=None,
logical_kern_align_top=True, logical_kern_align_top=True,
version=-1, version=-1,
verbose=0): verbose=0,
kshp=None):
self.border_mode = border_mode self.border_mode = border_mode
self.subsample = subsample self.subsample = subsample
if logical_img_hw is not None: if logical_img_hw is not None:
...@@ -152,6 +153,7 @@ class GpuConv(Op): ...@@ -152,6 +153,7 @@ class GpuConv(Op):
self.logical_kern_align_top = logical_kern_align_top self.logical_kern_align_top = logical_kern_align_top
self.version=version self.version=version
self.verbose=verbose self.verbose=verbose
self.kshp = kshp
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
...@@ -187,13 +189,16 @@ class GpuConv(Op): ...@@ -187,13 +189,16 @@ class GpuConv(Op):
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()]) return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def c_compile_args(self): def c_compile_args(self):
return ['-DDONT_UNROLL'] nb = 0
if self.kshp is not None:
nb = self.kshp[1]
return ['-DTHEANO_KERN_WID='+str(nb)]
def c_headers(self): def c_headers(self):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,1) return (0,2)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\ return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\
......
...@@ -347,6 +347,7 @@ def local_gpu_conv(node): ...@@ -347,6 +347,7 @@ def local_gpu_conv(node):
logical_img_hw=logical_img_hw, logical_img_hw=logical_img_hw,
logical_kern_hw=op.kshp_logical, logical_kern_hw=op.kshp_logical,
logical_kern_align_top=op.kshp_logical_top_aligned, logical_kern_align_top=op.kshp_logical_top_aligned,
kshp=op.kshp,
version=op.version, version=op.version,
verbose=op.verbose verbose=op.verbose
) )
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论