提交 41ab0389 authored 作者: Frederic's avatar Frederic

remove old code that isn't used.

上级 53630ed1
...@@ -502,32 +502,12 @@ class GpuConvMM(GpuOp): ...@@ -502,32 +502,12 @@ class GpuConvMM(GpuOp):
Author: Arjun Jain Author: Arjun Jain
Implement the caffe convolution Implement the caffe convolution
""" """
@staticmethod
def logical_output_shape_2d(imshp, kshp, mode):
if mode == 'valid':
return imshp[0] - kshp[0] + 1, imshp[1] - kshp[1] + 1
if mode == 'full':
return imshp[0] + kshp[0] - 1, imshp[1] + kshp[1] - 1
raise ValueError(mode)
def __init__(self, border_mode, def __init__(self, border_mode,
subsample=(1, 1), subsample=(1, 1),
logical_img_hw=None,
logical_kern_hw=None,
logical_kern_align_top=True,
version=-1,
verbose=0,
kshp=None, kshp=None,
imshp=None, imshp=None,
max_threads_dim0=None, pad=0): pad=0):
""" """
:param version: each version of c_code implements many kernel for the
convolution. By default we try to guess the best one.
You can force one version with this parameter. This
parameter is used by the tests.
:param verbose: for value of 1,2 and 3. Print more information during
the execution of the convolution. Mostly used for
optimization or debugging.
:param kshp: The size of the kernel. If provided, can generate :param kshp: The size of the kernel. If provided, can generate
faster code. If the GpuConv op is automatically faster code. If the GpuConv op is automatically
inserted, inserted,
...@@ -535,54 +515,25 @@ class GpuConvMM(GpuOp): ...@@ -535,54 +515,25 @@ class GpuConvMM(GpuOp):
:param imshp: The size of the image. Not used for code generation but :param imshp: The size of the image. Not used for code generation but
allows to select an experimental new version in another allows to select an experimental new version in another
repo. repo.
:param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the
GPU function.
""" """
self.border_mode = border_mode self.border_mode = border_mode
self.subsample = subsample self.subsample = subsample
if logical_img_hw is not None:
h, w = logical_img_hw
#TODO: reconsider this... since shapes are not given in
# constructor, maybe a multiplier + offset is a more
# appropriate way of passing this logical grid
logical_img_hw = tuple(logical_img_hw)
self.logical_img_hw = logical_img_hw
if logical_kern_hw is not None:
h, w = logical_kern_hw
#TODO: reconsider this... since shapes are not given in
# constructor, maybe a multiplier + offset is a more
# appropriate way of passing this logical grid
logical_kern_hw = tuple(logical_kern_hw)
self.logical_kern_hw = logical_kern_hw
self.logical_kern_align_top = logical_kern_align_top
self.version = version
self.verbose = verbose
self.kshp = kshp self.kshp = kshp
self.imshp = imshp self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0
self.pad = pad self.pad = pad
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
and self.border_mode == other.border_mode \ and self.border_mode == other.border_mode \
and self.subsample == other.subsample \ and self.subsample == other.subsample \
and self.logical_img_hw == other.logical_img_hw \
and self.logical_kern_hw == other.logical_kern_hw \
and self.logical_kern_align_top == other.logical_kern_align_top \
and self.version == other.version \
and self.verbose == other.verbose \
and self.kshp == other.kshp\ and self.kshp == other.kshp\
and self.imshp == other.imshp\ and self.imshp == other.imshp
and self.max_threads_dim0 == other.max_threads_dim0
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
if not hasattr(self, "imshp"): if not hasattr(self, "imshp"):
self.imshp = None self.imshp = None
if not hasattr(self, "max_threads_dim0"):
self.max_threads_dim0 = None
def __hash__(self): def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and # don't use hash(self.version) as hash(-1)==-2 and
...@@ -590,23 +541,14 @@ class GpuConvMM(GpuOp): ...@@ -590,23 +541,14 @@ class GpuConvMM(GpuOp):
return hash(type(self)) \ return hash(type(self)) \
^ hash(self.border_mode) \ ^ hash(self.border_mode) \
^ hash(self.subsample) \ ^ hash(self.subsample) \
^ hash(self.logical_img_hw) \
^ hash(self.logical_kern_hw) \
^ hash(self.logical_kern_align_top) \
^ self.version \
^ hash(self.verbose) \
^ hash(self.kshp)\ ^ hash(self.kshp)\
^ hash(self.imshp)\ ^ hash(self.imshp)
^ hash(self.max_threads_dim0)
def __str__(self): def __str__(self):
return '%s{%s, %s, %s, %s, %s, %s, %s}' % ( return '%s{%s, %s, %s, %s}' % (
self.__class__.__name__, self.__class__.__name__,
self.border_mode, self.border_mode,
str(self.subsample), str(self.subsample),
str(self.logical_img_hw),
str(self.logical_kern_hw),
str(self.logical_kern_align_top),
str(self.imshp), str(self.imshp),
str(self.kshp)) str(self.kshp))
...@@ -639,26 +581,6 @@ class GpuConvMM(GpuOp): ...@@ -639,26 +581,6 @@ class GpuConvMM(GpuOp):
images[2] * images[3] * 2) images[2] * images[3] * 2)
return flops return flops
def make_thunk(self, node, storage_map, compute_map, no_recycling):
node_ = copy.copy(node)
assert node.op is node_.op
if node_.op.max_threads_dim0 is None:
cuda = theano.sandbox.cuda
device_id = cuda.use.device_number
if device_id is None:
cuda.use("gpu",
force=False,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False,
test_driver=True)
device_id = cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
prop = cuda_ndarray.device_properties(device_id)
node_.op.max_threads_dim0 = prop['maxThreadsDim0']
return super(GpuConv, node_.op).make_thunk(node_, storage_map,
compute_map, no_recycling)
def c_compile_args(self): def c_compile_args(self):
nb = 0 nb = 0
if self.kshp is not None: if self.kshp is not None:
...@@ -686,26 +608,16 @@ class GpuConvMM(GpuOp): ...@@ -686,26 +608,16 @@ class GpuConvMM(GpuOp):
dx = self.subsample dx = self.subsample
dy = self.subsample dy = self.subsample
border_mode = self.border_mode border_mode = self.border_mode
version = self.version
verbose = self.verbose
sub = sub.copy() sub = sub.copy()
max_threads_dim0 = self.max_threads_dim0
pad = self.pad pad = self.pad
if max_threads_dim0 is None:
raise NotImplementedError("GpuConv.c_code should not be called "
"directly. It should be called by "
"make_thunk() that add some information "
"related to the selected GPU.")
sub.update(locals()) sub.update(locals())
return """ return """
//Mandatory args //Mandatory args
const char *mode_str = "%(border_mode)s"; const char *mode_str = "%(border_mode)s";
//Optional args //Optional args
int version = %(version)s;
int verbose = %(verbose)s;
int dx = %(dx)s; int dx = %(dx)s;
int dy = %(dy)s; int dy = %(dy)s;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论