提交 31904be5 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

new optimization for scan + new feature for Dimshuffle

上级 3903e59f
差异被折叠。
......@@ -43,6 +43,10 @@ class DimShuffle(Op):
dimension and a numerical index represents the dimension of the same
rank in the tensor passed to perform.
Note 2.04.2010 RP Added 'f' - means that we insert a non-broadcastable
dimension; 'f' behaves exactly like 'x', just that the new dimension is
not broadcastable
Examples:
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
......@@ -120,10 +124,10 @@ class DimShuffle(Op):
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra
# 'x' broadcastable dimensions to insert.
self.shuffle = [i2j[x] for x in new_order if x != 'x']
self.shuffle = [i2j[x] for x in new_order if x != 'x' and x != 'f']
# list of dimensions of the output that are broadcastable and were not in the original input
self.augment = [i for i, x in enumerate(new_order) if x == 'x']
self.augment = [i for i, x in enumerate(new_order) if x == 'x' or x == 'f']
if self.inplace:
self.view_map = {0: [0]}
......@@ -147,6 +151,8 @@ class DimShuffle(Op):
for value in self.new_order:
if value == 'x':
ob.append(True)
elif value == 'f':
ob.append(False)
else:
ob.append(ib[value])
......@@ -235,7 +241,7 @@ class DimShuffle(Op):
shape_statements = ['npy_intp dimensions[%i]'%nd_out]
for i, o in enumerate(self.new_order):
if o != 'x':
if o != 'x' and o != 'f':
shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')]
else:
shape_statements += [('dimensions['+str(i)+'] = 1')]
......@@ -250,7 +256,7 @@ class DimShuffle(Op):
#set the strides of the non-broadcasted dimensions
for i, o in enumerate(self.new_order):
if o != 'x':
if o != 'x' and o != 'f':
strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')]
else:
strides_statements += [('strides['+str(i)+'] = 0')]
......@@ -317,7 +323,7 @@ class DimShuffle(Op):
gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
if v != 'x':
if v != 'x' and v !='f':
grad_order[v] = i
return [DimShuffle(gz.type.broadcastable, grad_order, inplace=True)(Elemwise(scalar.identity)(gz))]
......
......@@ -125,7 +125,7 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = False)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
......@@ -146,7 +146,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_values, v_out)
# simple rnn, one input, one state, weights for each; input/state
# are vectors, weights are scalars; using shared variables
def test_one_sequence_one_output_weights_shared(self):
......@@ -159,7 +158,7 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_t,x_tm1, tmp_W_in, tmp_W):
return u_t*tmp_W_in+x_tm1*tmp_W
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =0,
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =None,
truncate_gradient= -1, go_backwards = False)
f3 = theano.function([u,x0], output, updates = updates)
# get random initial values
......@@ -176,7 +175,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_values, v_out)
# some rnn with multiple outputs and multiple inputs; other
# dimension instead of scalars/vectors
def test_multiple_inputs_multiple_outputs(self):
......@@ -203,7 +201,7 @@ class T_Scan(unittest.TestCase):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = 0,
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs, updates = updates)
......@@ -222,7 +220,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_y , v_y)
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
......@@ -242,7 +239,7 @@ class T_Scan(unittest.TestCase):
return u_tm2*W_in+x_tm1*W+x_tm2
outputs, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2),
dict(initial = x0, taps = [-1,-2]), [], n_steps = 0, truncate_gradient = -1,
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient = -1,
go_backwards = False)
f7 = theano.function([u,x0], outputs, updates = updates)
......@@ -282,7 +279,7 @@ class T_Scan(unittest.TestCase):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
output,updates = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
dict(initial = x0, taps = [-1,-2]), [], n_steps =0, truncate_gradient =-1,
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient =-1,
go_backwards = False)
f8 = theano.function([u,x0], output, updates = updates)
......@@ -324,7 +321,7 @@ class T_Scan(unittest.TestCase):
outputs, updates = theano.scan(f_rnn_shared, [u0,u1,u2],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = 0, truncate_gradient = -1, go_backwards = False, mode=mode )
[], n_steps = None, truncate_gradient = -1, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
# compute output in numpy
......@@ -374,7 +371,7 @@ class T_Scan(unittest.TestCase):
outputs, updates = theano.scan(f_rnn_shared,
[u0,dict(input = u1, taps = [0,1]),dict( input = u2, taps= [-1,0,+1])],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = 0, truncate_gradient = 01, go_backwards = False, mode=mode )
[], n_steps = None, truncate_gradient = 01, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
# compute output in numpy
......@@ -429,7 +426,7 @@ class T_Scan(unittest.TestCase):
y0 = theano.tensor.matrix('y0')
outputs,updates = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1,
None], [], n_steps = 0, go_backwards = False, truncate_gradient = -1)
None], [], n_steps = None, go_backwards = False, truncate_gradient = -1)
f10 = theano.function([u2,y0], outputs, updates = updates)
theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
......@@ -545,7 +542,7 @@ class T_Scan(unittest.TestCase):
u = theano.tensor.dvector()
outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =0 , truncate_gradient = -1,
outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =None , truncate_gradient = -1,
go_backwards = False)
f2 = theano.function([u], outputs, updates = updates)
......@@ -578,7 +575,7 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = True)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
......@@ -607,9 +604,7 @@ class T_Scan(unittest.TestCase):
rng = numpy.random.RandomState(utt.fetch_seed())
v_v = rng.uniform( size = (5,), low = -5., high = 5.)
print f(v_v,0.)
assert ( numpy.sum(v_v) == f(v_v, 0.) )
assert abs(numpy.sum(v_v) - f(v_v, 0.)) < 1e-3
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论