提交 e2134adb authored 作者: Frederic Bastien's avatar Frederic Bastien

more backport

上级 ba4178f7
......@@ -7,7 +7,8 @@ from theano.printing import Print
def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
"""Returns numpy ndarray of len 2
"""
s = -1 if mode=='valid' else 1
if mode=='valid': s = -1
else: s = 1
inshp, kshp = N.array(inshp), N.array(kshp)
return N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
N.array([dx,dy], dtype='float')))
......@@ -83,11 +84,13 @@ class ConvOp(Op):
self.verbose=verbose
self.version=version
# a triple
self.imshp_logical = self.imshp if imshp_logical is None else tuple(imshp_logical)
self.imshp_logical = self.imshp
if imshp_logical is not None: self.imshp_logical = tuple(imshp_logical)
assert len(self.imshp) == len(self.imshp_logical)
# a pair
self.kshp_logical = self.kshp if kshp_logical is None else tuple(kshp_logical)
self.kshp_logical = self.kshp
if kshp_logical is not None: self.kshp_logical = tuple(kshp_logical)
self.kshp_logical_top_aligned = kshp_logical_top_aligned
self.unroll_batch=unroll_batch
......@@ -349,7 +352,8 @@ class ConvOp(Op):
dw = dw[:,:,::-1,::-1]
####### Determine gradient on inputs ########
mode = 'valid' if self.out_mode == 'full' else 'full'
mode = 'valid'
if not self.out_mode == 'full': mode = 'full'
filters = kerns.dimshuffle((1,0,2,3))
filters = filters[:,:,::-1,::-1]
nkern = self.imshp[0]
......@@ -419,11 +423,13 @@ using namespace std;
d["self_imshp_logical_c"] = self.imshp_logical[2]#N.B. 2 not 1
d["self_imshp_logical_stride_r"] = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
d["self_imshp_logical_stride_c"] = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
d["affectation"]="=" if self.imshp[0]==1 else "+="
d["affectation"]="="
if not self.imshp[0]==1: d["affectation"]="+="
if node.inputs[0].type.dtype=="float32": d["type"]="float"
elif node.inputs[0].type.dtype=="float64": d["type"]="double"
else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
d["gemm"]='dgemm_' if d["type"]=="double" else 'sgemm_'
d["gemm"]='dgemm_'
if not d["type"]=="double":d["gemm"]='sgemm_'
#print 'LOGICAL OFFSET', self.kshp_logical_top_aligned, d["self_kshp_logical_r"],
#print d["self_kshp0"], d["self_kshp_logical_offset_r"], d["self_kshp_logical_stride_r"],
......@@ -464,7 +470,9 @@ def convolve2(kerns, kshp, nkern, images, imshp, bsize, step=(1,1),
#TODO: remove the bias argument from this function because convolution has nothing to do with a bias
# if imshp, is a tuple, images contains one input dimension
nvis_dim = 1 if len(imshp)!=3 else imshp[0]
if len(imshp)!=3:
nvis_dim = 1
else: nvis_dim = imshp[0]
# all these reshapes should happen in place
imrshp = tensor.as_tensor([bsize] + list(imshp))
......
......@@ -32,7 +32,9 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
:returns: dynamically-imported python module of the compiled code.
"""
preargs= [] if preargs is None else list(preargs)
if preargs is None:
preargs= []
else: preargs = list(preargs)
preargs.append('-fPIC')
no_opt = False
cuda_root = config.CUDA_ROOT
......
......@@ -180,9 +180,11 @@ class Kouh2008(object):
if rows is None and cols is None:
rows = int(numpy.sqrt(n_out))
if cols is None:
cols = n_out // rows + (1 if n_out % rows else 0)
cols = n_out // rows
if n_out % rows: cols+=1
if rows is None:
rows = n_out // cols + (1 if n_out % cols else 0)
rows = n_out // cols
if n_out % cols: rows+=1
filter_shape = self.filter_shape
height = rows * (row_gap + filter_shape[0]) - row_gap
......@@ -268,7 +270,10 @@ def test_bench_elemwise(n_iter=1000, **kwargs):
# get symbolic train set
s_lr = theano.tensor.fscalar()
x = theano.tensor.TensorType(dtype=conf.dtype, broadcastable=(0,0), shape=(None, 784 if not debug else 3))()
if not debug:
sshape = (None, 784)
else: sshape = (None, 3)
x = theano.tensor.TensorType(dtype=conf.dtype, broadcastable=(0,0), shape=sshape)()
y = theano.tensor.lvector()
rng = numpy.random.RandomState(conf.rng_seed)
......
......@@ -20,7 +20,10 @@ logging.getLogger('theano.sandbox.cuda.tests.test_nnet').setLevel(logging.INFO)
def get_mode():
if theano.compile.default_mode == 'CLINKER_MODE':
return theano.compile.mode.Mode(optimizer='fast_run', linker='c')
return None if theano.compile.default_mode != "PROFILE_MODE" else theano.compile.ProfileMode()
if theano.compile.default_mode != "PROFILE_MODE":
return None
else:
return theano.compile.ProfileMode()
def print_mode(mode):
if mode != None and isinstance(mode,(theano.compile.ProfileMode,)):
......
......@@ -119,11 +119,13 @@ class CudaNdarrayType(Type):
else:
b = self.broadcastable
#bcast = str(self.broadcastable)
s=len(b)
if numpy.any(b): s = str(b)
bcast = {(): 'scalar',
(False,): 'vector',
(False, True): 'col',
(True, False): 'row',
(False, False): 'matrix'}.get(b, "%iD" % len(b) if not any(b) else str(b))
(False, False): 'matrix'}.get(b, "%iD" % s)
return "CudaNdarrayType(%s, %s)" % (str(self.dtype), bcast)
def __repr__(self):
......
......@@ -38,15 +38,19 @@ class DownsampleFactorMaxGrad(Op):
gx = numpy.zeros_like(x)
ds0, ds1 = self.ds
shape2 = (x.shape[2] / ds0 * ds0) if self.ignore_border else x.shape[2]
shape3 = (x.shape[3] / ds1 * ds1) if self.ignore_border else x.shape[3]
shape2 = (x.shape[2] / ds0 * ds0)
if not self.ignore_border: shape2 = x.shape[2]
shape3 = (x.shape[3] / ds1 * ds1)
if not self.ignore_border: shape3 = x.shape[3]
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for i in xrange(shape2):
zi = i / ds0
for j in xrange(shape3):
zj = j / ds1
gx[n,k,i,j] = gz[n,k,zi,zj] if (maxout[n,k,zi,zj] == x[n,k,i,j]) else 0
if (maxout[n,k,zi,zj] == x[n,k,i,j]):
gx[n,k,i,j] = gz[n,k,zi,zj]
else: gx[n,k,i,j] = 0
gx_stg[0] = gx
def c_code(self, node, name, (x, z, gz), (gx,), sub):
......@@ -217,9 +221,12 @@ class DownsampleFactorMax(Op):
z[0] = numpy.asarray(z[0], dtype=x.dtype)
zz=z[0]
ds0, ds1 = self.ds
x_usable2 = (x.shape[2] / ds0 * ds0) if self.ignore_border else x.shape[2]
x_usable3 = (x.shape[3] / ds1 * ds1) if self.ignore_border else x.shape[3]
if self.ignore_border:
x_usable2 = (x.shape[2] / ds0 * ds0)
else: x_usable2 = x.shape[2]
if self.ignore_border:
x_usable3 = (x.shape[3] / ds1 * ds1)
else: x_usable3 = x.shape[3]
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for i in xrange(x_usable2):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论