提交 41ab0389 authored 作者: Frederic's avatar Frederic

remove old code that isn't used.

上级 53630ed1
......@@ -502,32 +502,12 @@ class GpuConvMM(GpuOp):
Author: Arjun Jain
Implement the caffe convolution
"""
@staticmethod
def logical_output_shape_2d(imshp, kshp, mode):
if mode == 'valid':
return imshp[0] - kshp[0] + 1, imshp[1] - kshp[1] + 1
if mode == 'full':
return imshp[0] + kshp[0] - 1, imshp[1] + kshp[1] - 1
raise ValueError(mode)
def __init__(self, border_mode,
subsample=(1, 1),
logical_img_hw=None,
logical_kern_hw=None,
logical_kern_align_top=True,
version=-1,
verbose=0,
kshp=None,
imshp=None,
max_threads_dim0=None, pad=0):
pad=0):
"""
:param version: each version of c_code implements many kernel for the
convolution. By default we try to guess the best one.
You can force one version with this parameter. This
parameter is used by the tests.
:param verbose: for value of 1,2 and 3. Print more information during
the execution of the convolution. Mostly used for
optimization or debugging.
:param kshp: The size of the kernel. If provided, can generate
faster code. If the GpuConv op is automatically
inserted,
......@@ -535,54 +515,25 @@ class GpuConvMM(GpuOp):
:param imshp: The size of the image. Not used for code generation but
allows to select an experimental new version in another
repo.
:param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the
GPU function.
"""
self.border_mode = border_mode
self.subsample = subsample
if logical_img_hw is not None:
h, w = logical_img_hw
#TODO: reconsider this... since shapes are not given in
# constructor, maybe a multiplier + offset is a more
# appropriate way of passing this logical grid
logical_img_hw = tuple(logical_img_hw)
self.logical_img_hw = logical_img_hw
if logical_kern_hw is not None:
h, w = logical_kern_hw
#TODO: reconsider this... since shapes are not given in
# constructor, maybe a multiplier + offset is a more
# appropriate way of passing this logical grid
logical_kern_hw = tuple(logical_kern_hw)
self.logical_kern_hw = logical_kern_hw
self.logical_kern_align_top = logical_kern_align_top
self.version = version
self.verbose = verbose
self.kshp = kshp
self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0
self.pad = pad
def __eq__(self, other):
return type(self) == type(other) \
and self.border_mode == other.border_mode \
and self.subsample == other.subsample \
and self.logical_img_hw == other.logical_img_hw \
and self.logical_kern_hw == other.logical_kern_hw \
and self.logical_kern_align_top == other.logical_kern_align_top \
and self.version == other.version \
and self.verbose == other.verbose \
and self.kshp == other.kshp\
and self.imshp == other.imshp\
and self.max_threads_dim0 == other.max_threads_dim0
and self.imshp == other.imshp
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "imshp"):
self.imshp = None
if not hasattr(self, "max_threads_dim0"):
self.max_threads_dim0 = None
def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and
......@@ -590,23 +541,14 @@ class GpuConvMM(GpuOp):
return hash(type(self)) \
^ hash(self.border_mode) \
^ hash(self.subsample) \
^ hash(self.logical_img_hw) \
^ hash(self.logical_kern_hw) \
^ hash(self.logical_kern_align_top) \
^ self.version \
^ hash(self.verbose) \
^ hash(self.kshp)\
^ hash(self.imshp)\
^ hash(self.max_threads_dim0)
^ hash(self.imshp)
def __str__(self):
return '%s{%s, %s, %s, %s, %s, %s, %s}' % (
return '%s{%s, %s, %s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample),
str(self.logical_img_hw),
str(self.logical_kern_hw),
str(self.logical_kern_align_top),
str(self.imshp),
str(self.kshp))
......@@ -639,26 +581,6 @@ class GpuConvMM(GpuOp):
images[2] * images[3] * 2)
return flops
def make_thunk(self, node, storage_map, compute_map, no_recycling):
node_ = copy.copy(node)
assert node.op is node_.op
if node_.op.max_threads_dim0 is None:
cuda = theano.sandbox.cuda
device_id = cuda.use.device_number
if device_id is None:
cuda.use("gpu",
force=False,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False,
test_driver=True)
device_id = cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
prop = cuda_ndarray.device_properties(device_id)
node_.op.max_threads_dim0 = prop['maxThreadsDim0']
return super(GpuConv, node_.op).make_thunk(node_, storage_map,
compute_map, no_recycling)
def c_compile_args(self):
nb = 0
if self.kshp is not None:
......@@ -686,26 +608,16 @@ class GpuConvMM(GpuOp):
dx = self.subsample
dy = self.subsample
border_mode = self.border_mode
version = self.version
verbose = self.verbose
sub = sub.copy()
max_threads_dim0 = self.max_threads_dim0
pad = self.pad
if max_threads_dim0 is None:
raise NotImplementedError("GpuConv.c_code should not be called "
"directly. It should be called by "
"make_thunk() that add some information "
"related to the selected GPU.")
sub.update(locals())
return """
//Mandatory args
const char *mode_str = "%(border_mode)s";
//Optional args
int version = %(version)s;
int verbose = %(verbose)s;
int dx = %(dx)s;
int dy = %(dy)s;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论