提交 a96f4908 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Scan finally works with shared variables ! .. there are a few changes to the…

Scan finally works with shared variables ! .. there are a few changes to the documentation of scan that need to be added that will follow soon
上级 fbc285a7
...@@ -30,8 +30,12 @@ import theano ...@@ -30,8 +30,12 @@ import theano
from theano.tensor import opt from theano.tensor import opt
from theano import gof from theano import gof
from theano.compile import optdb from theano.compile import optdb
import theano.tensor.shared_randomstreams as shared_random
import numpy import numpy
# Logging function for sending warning or info # Logging function for sending warning or info
import logging import logging
_logger = logging.getLogger('theano.scan') _logger = logging.getLogger('theano.scan')
...@@ -166,7 +170,6 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \ ...@@ -166,7 +170,6 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
n_seqs = len(seqs) n_seqs = len(seqs)
n_outs = len(init_outs) n_outs = len(init_outs)
# update sequences_taps[idx] to contain 0 if it is not defined # update sequences_taps[idx] to contain 0 if it is not defined
for i in xrange(n_seqs): for i in xrange(n_seqs):
if not sequences_taps.has_key(i): if not sequences_taps.has_key(i):
...@@ -184,17 +187,6 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \ ...@@ -184,17 +187,6 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
outputs_taps.__delitem__(i) outputs_taps.__delitem__(i)
elif not(type(outputs_taps[i]) in (list,tuple)): elif not(type(outputs_taps[i]) in (list,tuple)):
outputs_taps[i] = [outputs_taps[i]] outputs_taps[i] = [outputs_taps[i]]
'''
# update stored_steps_output list
for i in xrange(n_outs):
if not stored_steps_output.has_key(i):
stored_steps_output[i] = True
elif not stored_steps_output[i]:
if outputs_taps[i] != [-1]:
stored_steps_output[i] = True
warning('You need to keep past value of outputs if you use'\
'past taps of output different from -1')
'''
stored_steps_output = [ 0 for i in xrange(n_outs)] stored_steps_output = [ 0 for i in xrange(n_outs)]
...@@ -202,47 +194,138 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \ ...@@ -202,47 +194,138 @@ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
# create theano inputs for the recursive function # create theano inputs for the recursive function
args = [] args = []
_ins = 0
_outs = 0
for (i,seq) in enumerate(seqs): for (i,seq) in enumerate(seqs):
if sequences_taps.has_key(i): if sequences_taps.has_key(i):
for k in xrange(len(sequences_taps[i])): for k in xrange(len(sequences_taps[i])):
args += [seq[0].type() ] args += [seq[0].type() ]
_ins += 1
for (i,init_out) in enumerate(init_outs): for (i,init_out) in enumerate(init_outs):
if outputs_taps.has_key(i): if outputs_taps.has_key(i):
for k in xrange(len(outputs_taps[i])): for k in xrange(len(outputs_taps[i])):
if outputs_taps[i] == [-1]: if outputs_taps[i] == [-1]:
args += [init_out.type() ] args += [init_out.type() ]
_outs += 1
else: else:
args += [init_out[0].type() ] args += [init_out[0].type() ]
for non_seq in non_seqs : _outs += 1
if not isinstance(non_seq, theano.compile.sharedvalue.SharedVariable): noshared = []
args += [non_seq] for non_seq in non_seqs:
if not isinstance(non_seq, theano.compile.SharedVariable):
noshared += [non_seq]
dummy_args = args + noshared
args += non_seqs
outputs_updates = fn(*args)
otuputs = []
updates = {}
# we try now to separate the outputs from the updates
if not type(outputs_updates) in (list,tuple):
if type(outputs_updates) == dict :
# we have just an update dictionary
updates = outputs_updates
else: else:
tmp_var = theano.tensor.Tensor(dtype = non_seq.dtype, outputs = [outputs_updates]
broadcastable = non_seq.broadcastable)() else:
args += [ tmp_var ] elem0 = outputs_updates[0]
elem1 = outputs_updates[1]
if ( type(elem0) == dict ) or \
( type(elem0) in (list,tuple) and type(elem0[0]) in (list,tuple)):
# elem0 is the updates dictionary / list
updates = elem0
outputs = elem1
if not type(outputs) in (list,tuple):
outputs = [outputs]
elif ( type(elem1) == dict) or \
( type(elem1) in (list,tuple) and type(elem1[0]) in (list,tuple)):
# elem1 is the updates dictionary / list
updates = elem1
outputs = elem0
if not type(outputs) in (list,tuple):
outputs = [outputs]
else :
if type(outputs_updates) in (list,tuple) and \
( type(outputs_updates[0]) in (list,tuple)):
outputs = []
updates = outputs_updates
else:
outputs = outputs_updates
updates = {}
next_outs = fn(*args) # Wo compile a dummy function just to see what shared variable
# we have and what are their update rules
dummy_f = theano.function(dummy_args, outputs, updates = updates, mode = \
theano.compile.mode.Mode(linker = 'py', optimizer = None) )
if not (type(next_outs) in (list,tuple)):
next_outs = [next_outs]
ls_outputs = [ sout.variable for sout in dummy_f.maker.outputs]
update_map = {}
n_actual_outs = n_outs
shared_outs = []
shared_non_seqs = []
givens = {}
ls_inputs=[inp.variable for inp in \
dummy_f.maker.expanded_inputs[:_ins+_outs]]
fromIdx = _ins + _outs
# add shared variable that act as outputs
for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
if isinstance(inp.variable, theano.compile.SharedVariable) and inp.update:
ls_inputs.append(inp.variable.type())
ls_outputs += [inp.update]
update_map[ inp.variable ] = n_outs
outputs_taps[ n_outs ] = [-1]
n_outs += 1
stored_steps_output += [1]
shared_outs += [inp.variable]
givens[inp.variable] = ls_inputs[-1]
# add the rest:
for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
if isinstance(inp.variable, theano.compile.SharedVariable) and not inp.update:
shared_non_seqs += [inp.variable]
ls_inputs += [inp.variable.type() ]
givens[inp.variable] = ls_inputs[-1]
elif not isinstance(inp.variable, theano.compile.SharedVariable):
ls_inputs.append(inp.variable)
# Create the Scan op object # Create the Scan op object
local_op = Scan( (args,next_outs ), n_seqs,n_outs,inplace_map, local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs, inplace_map,
sequences_taps, outputs_taps, truncate_gradient, sequences_taps, outputs_taps, truncate_gradient,
go_backwards, stored_steps_output, mode) go_backwards, stored_steps_output, mode)
# Call the object on the input sequences, initial values for outs, # Call the object on the input sequences, initial values for outs,
# and non sequences # and non sequences
return local_op( *( [theano.tensor.as_tensor(n_steps)] \ values = local_op( *( [theano.tensor.as_tensor(n_steps)] \
+ seqs \ + seqs \
+ init_outs \ + init_outs \
+ non_seqs)) + shared_outs \
+ noshared
+ shared_non_seqs))
for k in update_map.keys():
update_map[k] = values [ update_map[k] ]
if n_actual_outs != n_outs :
if n_actual_outs == 1:
values = values[0]
else:
values = values[:n_actual_outs]
return (values, update_map)
class Scan(theano.Op): class Scan(theano.Op):
def __init__(self,(inputs, outputs),n_seqs, n_outs, def __init__(self,(inputs, outputs, givens),n_seqs, n_outs,
inplace_map={}, seqs_taps={}, outs_taps={}, inplace_map={}, seqs_taps={}, outs_taps={},
truncate_gradient = -1, truncate_gradient = -1,
go_backwards = False, stored_steps_output = {}, go_backwards = False, stored_steps_output = {},
...@@ -319,35 +402,14 @@ class Scan(theano.Op): ...@@ -319,35 +402,14 @@ class Scan(theano.Op):
self.stored_steps_output = stored_steps_output self.stored_steps_output = stored_steps_output
self.inplace = inplace self.inplace = inplace
self.inputs = inputs self.inputs = inputs
self.givens = givens
self.outputs = outputs self.outputs = outputs
self.truncate_gradient = truncate_gradient self.truncate_gradient = truncate_gradient
self.go_backwards = go_backwards self.go_backwards = go_backwards
self.fn = theano.function(inputs,outputs, mode = mode) self.fn = theano.function(inputs,outputs, mode = mode, givens = givens)
g_y = [outputs[0].type()]
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
theano._asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
g_args = compute_gradient( outputs[0], g_y[-1])
# for all outputs compute gradients and then sum them up
for y in outputs[1:]:
g_y += [y.type()]
g_args_y = compute_gradient( y,g_y[-1])
for i in xrange(len(g_args)):
g_args[i] += g_args_y[i]
self.g_ins = g_y+inputs
self.g_outs = g_args
def make_node(self,*inputs): def make_node(self,*inputs):
...@@ -356,27 +418,77 @@ class Scan(theano.Op): ...@@ -356,27 +418,77 @@ class Scan(theano.Op):
err = 'There should be at least '+str(self.n_args)+ 'arguments' err = 'There should be at least '+str(self.n_args)+ 'arguments'
raise ValueError(err) raise ValueError(err)
# return a new variable of same type and same shape
def new_same_dim(var):
try:
nw_var = theano.tensor.as_tensor_variable(var)
return nw_var.type()
except TypeError:
if isinstance(var, shared_random.RandomStateSharedVariable):
return var.type()
else:
raise TypeError("Could not convert %s to suitable type"%var,
type(var))
# return a new variable of same type but with an extra dimension
def new_add_one_dim(var):
nw_var = theano.tensor.as_tensor_variable(var)
return theano.tensor.Tensor( dtype = nw_var.dtype, \
broadcastable = (False,)+nw_var.broadcastable)()
def new_replace_one_dim(var):
nw_var = theano.tensor.as_tensor_variable(var)
return theano.tensor.Tensor( dtype = nw_var.dtype, \
broadcastable = (False,)+nw_var.broadcastable[1:])()
def new_remove_one_dim(var):
nw_var = theano.tensor.as_tensor_variable(var)
return theano.tensor.Tensor( dtype = nw_var.dtype, \
broadcastable = nw_var.broadcastable[1:])()
# Create list of output datatypes # Create list of output datatypes
out_types = [] out_types = []
for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1): for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1):
out_idx = i - 1 - self.n_seqs
if not (inputs[i] == []): if not (inputs[i] == []):
if self.outs_taps.has_key(i-1-self.n_seqs): ## CASES :
if (self.outs_taps[i-self.n_seqs-1] == [-1]) and \ # outs_taps[i] == [-1] or == [] => inputs[i] no extra dim
(self.stored_steps_output[i-1-self.n_seqs] != 1): # outs_taps anything else => inputs[i] remove one dim
out_types += [ theano.tensor.Tensor(dtype=inputs[i].dtype,
broadcastable = (False,)+inputs[i].broadcastable)()] #
elif not self.stored_steps_output[i-1-self.n_seqs] ==1 : # stored_steps_outputs = 1 ==> outs no extra dim
out_types += [inputs[i].type()] # anything else --> needs extra dim
sw_inputs = self.outs_taps.get(out_idx, [-1]) == [-1]
sw_outputs = self.stored_steps_output[out_idx] == 1
if sw_inputs:
if sw_outputs:
# You need to output something identical to the
# input.. which can even be a non tensor
out_types += [ new_same_dim(inputs[i]) ]
else: else:
out_types += [theano.tensor.Tensor(dtype = inputs[i].dtype, \ # You need to output a list of things identical to
broadcastable = (False,)+inputs[i].broadcastable[1:])()] # the input .. (here we force it to be a tensor )
out_types += [ new_add_one_dim(inputs[i]) ]
else: else:
if self.stored_steps_output[i-1-self.n_seqs] != 1 : if sw_outputs:
out_types += [ theano.tensor.Tensor(dtype = inputs[i].dtype, # your input has one dimension more, so you need
broadcastable = (False,)+inputs[i].broadcastable)()] # to strip it by its first dimension
else: out_types += [new_remove_one_dim(inputs[i])]
out_types += [ theano.tensor.Tensor(dtype = inputs[i].dtype, else:
broadcastable = inputs[i].broadcastable)()] # input and output have the same # of dimensions,
# just that you need to "refresh" the first one
# this is important only in the corner case that
# the first dimension of the input is 1, in which
# case the output broadcastable pattern does not
# match the input broadcastable pattern
#
# Note that this should in practice never happen !!
# I add it here just for safety
out_types += [new_replace_one_dim(inputs[i])]
else: else:
raise ValueError(('You need to provide initial state for outputs' raise ValueError(('You need to provide initial state for outputs'
' such that scan can infer what dataype they are')) ' such that scan can infer what dataype they are'))
...@@ -388,6 +500,7 @@ class Scan(theano.Op): ...@@ -388,6 +500,7 @@ class Scan(theano.Op):
if rval: if rval:
rval = (self.inputs == other.inputs) and \ rval = (self.inputs == other.inputs) and \
(self.outputs == other.outputs) and \ (self.outputs == other.outputs) and \
(self.givens == other.givens) and \
(self.stored_steps_output == other.stored_steps_output) and \ (self.stored_steps_output == other.stored_steps_output) and \
(self.seqs_taps == other.seqs_taps) and \ (self.seqs_taps == other.seqs_taps) and \
(self.outs_taps == other.outs_taps) and \ (self.outs_taps == other.outs_taps) and \
...@@ -411,8 +524,7 @@ class Scan(theano.Op): ...@@ -411,8 +524,7 @@ class Scan(theano.Op):
hash(self.n_args) ^ \ hash(self.n_args) ^ \
hash_listsDictsTuples(self.outputs) ^ \ hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.inputs) ^ \ hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.g_ins) ^ \ hash_listsDictsTuples(self.givens) ^ \
hash_listsDictsTuples(self.g_outs) ^ \
hash_listsDictsTuples(self.seqs_taps) ^\ hash_listsDictsTuples(self.seqs_taps) ^\
hash_listsDictsTuples(self.outs_taps) ^\ hash_listsDictsTuples(self.outs_taps) ^\
hash_listsDictsTuples(self.stored_steps_output) hash_listsDictsTuples(self.stored_steps_output)
...@@ -458,14 +570,12 @@ class Scan(theano.Op): ...@@ -458,14 +570,12 @@ class Scan(theano.Op):
for i in xrange(self.n_seqs+1, \ for i in xrange(self.n_seqs+1, \
self.n_seqs+self.n_outs+1): self.n_seqs+self.n_outs+1):
if self.outs_taps.has_key(i-self.n_seqs-1): if self.outs_taps.has_key(i-self.n_seqs-1):
if self.outs_taps[i-self.n_seqs-1] == [-1]: if self.outs_taps[i-self.n_seqs-1] != [-1]:
args[i] = numpy.array([args[i]]) req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
if args[i].shape[0] < req_size:
req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1 warning(('Initial state for output %d has fewer values then '
if args[i].shape[0] < req_size: 'required by the maximal past value %d. Scan will use 0s'
warning(('Initial state for output %d has fewer values then ' ' for missing values')%(i-self.n_iterable-1,req_size))
'required by the maximal past value %d. Scan will use 0s'
' for missing values')%(i-self.n_iterable-1,req_size))
self.n_steps = n_steps self.n_steps = n_steps
y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs, y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs,
...@@ -487,19 +597,18 @@ class Scan(theano.Op): ...@@ -487,19 +597,18 @@ class Scan(theano.Op):
if inplace_map.has_key(i) and (inplace_map[i] >= 0): if inplace_map.has_key(i) and (inplace_map[i] >= 0):
y += [args[inplace_map[i]]] y += [args[inplace_map[i]]]
else: else:
arg_shape = args[i+n_seqs].shape[1:] if self.stored_steps_output[i] == 1 :
if not self.outs_taps.has_key(i): y+= [ None ]
arg_shape = args[i+n_seqs].shape
if self.stored_steps_output[i] < 1 :
y_shape = (n_steps,)+arg_shape
elif self.stored_steps_output[i] == 1:
y_shape = arg_shape
else: else:
y_shape = (self.stored_steps_output[i],)+arg_shape arg_shape = args[i+n_seqs].shape[1:]
if (not self.outs_taps.has_key(i)) or \
self.outs_taps[i] == [-1]:
y += [numpy.empty(y_shape, arg_shape = args[i+n_seqs].shape
dtype=args[i+n_seqs].dtype)] if self.stored_steps_output[i] < 1 :
y_shape = (n_steps,)+arg_shape
else:
y_shape = (self.stored_steps_output[i],)+arg_shape
y += [numpy.empty(y_shape, dtype=args[i+n_seqs].dtype)]
seqs_mins = {} seqs_mins = {}
for j in xrange(n_seqs): for j in xrange(n_seqs):
if seqs_taps.has_key(j): if seqs_taps.has_key(j):
...@@ -510,7 +619,10 @@ class Scan(theano.Op): ...@@ -510,7 +619,10 @@ class Scan(theano.Op):
for j in xrange(n_outs): for j in xrange(n_outs):
if outs_taps.has_key(j): if outs_taps.has_key(j):
outs_mins.update({j: min(outs_taps[j])}) outs_mins.update({j: min(outs_taps[j])})
initOuts_size.update({j: args[n_seqs+j].shape[0]}) if self.outs_taps[j] != [-1]:
initOuts_size.update({j: args[n_seqs+j].shape[0]})
else:
initOuts_size.update({j: 0})
for i in xrange(n_steps): for i in xrange(n_steps):
...@@ -538,14 +650,17 @@ class Scan(theano.Op): ...@@ -538,14 +650,17 @@ class Scan(theano.Op):
sz = initOuts_size[j] sz = initOuts_size[j]
for tap_value in ls_taps: for tap_value in ls_taps:
if i + tap_value < 0: if i + tap_value < 0:
k = i + sz + tap_value if sz < 1:
if k < 0: fn_args += [args[j+n_seqs] ]
# past value not provided.. issue a warning and use 0s
fn_args += [numpy.zeros(args[j+n_seqs][0].shape)]
warning(('Past value %d for output %d not given in inital '
'out') % (j,tap_value))
else: else:
fn_args += [args[j+n_seqs][k]] k = i + sz + tap_value
if k < 0:
# past value not provided.. issue a warning and use 0s
fn_args += [numpy.zeros(args[j+n_seqs][0].shape)]
warning(('Past value %d for output %d not given in '
'inital out') % (j,tap_value))
else:
fn_args += [args[j+n_seqs][k]]
else: else:
if self.stored_steps_output[j] < 1: if self.stored_steps_output[j] < 1:
fn_args += [y[j][i + tap_value]] fn_args += [y[j][i + tap_value]]
...@@ -587,6 +702,33 @@ class Scan(theano.Op): ...@@ -587,6 +702,33 @@ class Scan(theano.Op):
if not( type(y) in (list,tuple)): if not( type(y) in (list,tuple)):
y = [y] y = [y]
g_y = [outputs[0].type()]
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
theano._asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
i = 0
while
g_args = compute_gradient( outputs[0], g_y[-1])
# for all outputs compute gradients and then sum them up
for y in outputs[1:]:
g_y += [y.type()]
g_args_y = compute_gradient( y,g_y[-1])
for i in xrange(len(g_args)):
g_args[i] += g_args_y[i]
self.g_ins = g_y+inputs
self.g_outs = g_args
# backwards pass # backwards pass
for i in xrange(len(y)): for i in xrange(len(y)):
...@@ -617,7 +759,7 @@ def scan_make_inplace(node): ...@@ -617,7 +759,7 @@ def scan_make_inplace(node):
op = node.op op = node.op
if isinstance(op, Scan) and (not op.inplace) \ if isinstance(op, Scan) and (not op.inplace) \
and (op.inplace_map.keys() != []): and (op.inplace_map.keys() != []):
return Scan((op.inputs, op.outputs ) , op.n_seqs, return Scan((op.inputs, op.outputs, op.givens ) , op.n_seqs,
op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps,
op.truncate_gradient, op.go_backwards, op.stored_steps_output, op.truncate_gradient, op.go_backwards, op.stored_steps_output,
inplace=True inplace=True
...@@ -625,7 +767,7 @@ def scan_make_inplace(node): ...@@ -625,7 +767,7 @@ def scan_make_inplace(node):
return False return False
optdb.register('scan_make_inplace', opt.in2out(scan_make_inplace, optdb.register('scanOp_make_inplace', opt.in2out(scan_make_inplace,
ignore_newtrees=True), 75, 'fast_run', 'inplace') ignore_newtrees=True), 75, 'fast_run', 'inplace')
......
...@@ -88,25 +88,26 @@ def compareArrays(a,b): ...@@ -88,25 +88,26 @@ def compareArrays(a,b):
class T_Scan(unittest.TestCase): class T_Scan(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
# generator network, only one output , type scalar ; no sequence or # generator network, only one output , type scalar ; no sequence or
# non sequence arguments # non sequence arguments
def test_1(self): def test_1(self):
def f_pow2(x_tm1): def f_pow2(x_tm1):
return 2*x_tm1 return 2*x_tm1
s = theano.tensor.dscalar() s = theano.tensor.dscalar()
n_steps = theano.tensor.dscalar() n_steps = theano.tensor.dscalar()
Y = theano.scan(f_pow2, [],s, [],n_steps = n_steps) Y, updts = theano.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y, updates = updts)
f1 = theano.function([s,n_steps], Y)
assert(compareArrays(f1(1,3), [2,4,8])) assert(compareArrays(f1(1,3), [2,4,8]))
# simple rnn, one input, one state, weights for each; input/state are # simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars # vectors, weights are scalars
def test_2(self): def test_2(self):
def f_rnn(u_t,x_tm1,W_in, W): def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W return u_t*W_in+x_tm1*W
...@@ -115,9 +116,9 @@ class T_Scan(unittest.TestCase): ...@@ -115,9 +116,9 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.dscalar() W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar() W = theano.tensor.dscalar()
Y = theano.scan(f_rnn, u,x0,[W_in,W]) Y, updts = theano.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y) f2 = theano.function([u,x0,W_in,W], Y, updates = updts)
v_u = numpy.array([1.,2.,3.,4.]) v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array(1) v_x0 = numpy.array(1)
v_out = numpy.array([1.1,1.3,1.6,2.]) v_out = numpy.array([1.1,1.3,1.6,2.])
...@@ -132,18 +133,17 @@ class T_Scan(unittest.TestCase): ...@@ -132,18 +133,17 @@ class T_Scan(unittest.TestCase):
W_in = theano.shared(.1, name = 'w_in') W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w') W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1): def f_rnn_shared(u_t,x_tm1, l_W_in, l_W):
return u_t*W_in+x_tm1*W return u_t*l_W_in+x_tm1*l_W
Y = theano.scan(f_rnn_shared, u,x0,[]) Y, updts = theano.scan(f_rnn_shared, u,x0,[W_in, W] )
f3 = theano.function([u,x0], Y) f3 = theano.function([u,x0], Y, updates = updts)
v_u = numpy.array([1.,2.,3.,4.]) v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array(1.) v_x0 = numpy.array(1.)
v_out = numpy.array([1.1,1.3,1.6,2.]) v_out = numpy.array([1.1,1.3,1.6,2.])
assert(compareArrays(f3(v_u,v_x0),v_out)) assert(compareArrays(f3(v_u,v_x0),v_out))
# some rnn with multiple outputs and multiple inputs; other dimension # some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors # instead of scalars/vectors
def test_4(self): def test_4(self):
...@@ -161,9 +161,9 @@ class T_Scan(unittest.TestCase): ...@@ -161,9 +161,9 @@ class T_Scan(unittest.TestCase):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \ return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)] theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
Y = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1) Y, updts = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y) f4 = theano.function([u1,u2,x0,y0,W_in1], Y, updates = updts)
v_u1 = numpy.array([[1.,2.],[1.,2.],[1.,2.]]) v_u1 = numpy.array([[1.,2.],[1.,2.],[1.,2.]])
v_u2 = numpy.array([1.,2.,3.]) v_u2 = numpy.array([1.,2.,3.])
v_x0 = numpy.array([0.,0.]) v_x0 = numpy.array([0.,0.])
...@@ -175,7 +175,7 @@ class T_Scan(unittest.TestCase): ...@@ -175,7 +175,7 @@ class T_Scan(unittest.TestCase):
assert( compareArrays(x,v_x)) assert( compareArrays(x,v_x))
assert( compareArrays(y,v_y)) assert( compareArrays(y,v_y))
# simple rnn, one input, one state, weights for each; input/state are # simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past # vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) # taps (sequences and outputs)
...@@ -189,15 +189,15 @@ class T_Scan(unittest.TestCase): ...@@ -189,15 +189,15 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_tm2, x_tm1, x_tm2): def f_rnn_shared(u_tm2, x_tm1, x_tm2):
return u_tm2*W_in+x_tm1*W+x_tm2 return u_tm2*W_in+x_tm1*W+x_tm2
Y = theano.scan(f_rnn_shared, u,x0, [], \ Y, updates = theano.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]}) sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
f7 = theano.function([u,x0], Y) f7 = theano.function([u,x0], Y, updates = updates)
v_u = numpy.asarray([1.,2.,3.,4.]) v_u = numpy.asarray([1.,2.,3.,4.])
v_x0 = numpy.asarray([1.,2.]) v_x0 = numpy.asarray([1.,2.])
out = numpy.asarray([3.1,5.3]) out = numpy.asarray([3.1,5.3])
assert (compareArrays( out, f7(v_u, v_x0))) assert (compareArrays( out, f7(v_u, v_x0)))
# simple rnn, one input, one state, weights for each; input/state are # simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past # vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) and future taps for sequences # taps (sequences and outputs) and future taps for sequences
...@@ -211,16 +211,16 @@ class T_Scan(unittest.TestCase): ...@@ -211,16 +211,16 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2): def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2 return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
Y = theano.scan(f_rnn_shared, u,x0, [], \ Y,updts = theano.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]}) sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
f8 = theano.function([u,x0], Y) f8 = theano.function([u,x0], Y, updates = updts)
v_u = numpy.array([1.,2.,3.,4.,5.,6.]) v_u = numpy.array([1.,2.,3.,4.,5.,6.])
v_x0 = numpy.array([1.,2.]) v_x0 = numpy.array([1.,2.])
out = numpy.array([3.6, 6.4]) out = numpy.array([3.6, 6.4])
assert (compareArrays( out, f8(v_u, v_x0) ) ) assert (compareArrays( out, f8(v_u, v_x0) ) )
# simple rnn ; compute inplace # simple rnn ; compute inplace
def test_7(self): def test_7(self):
...@@ -232,10 +232,9 @@ class T_Scan(unittest.TestCase): ...@@ -232,10 +232,9 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_t, x_tm1): def f_rnn_shared(u_t, x_tm1):
return u_t*W_in + x_tm1*W return u_t*W_in + x_tm1*W
Y = theano.scan(f_rnn_shared, u, x0,[], \ Y, updts = theano.scan(f_rnn_shared, u, x0,[], \
inplace_map={0:0} ) inplace_map={0:0} )
f9 = theano.function([mu,x0], Y , #mode = 'FAST_RUN') f9 = theano.function([mu,x0], Y , updates = updts)
mode = 'DEBUG_MODE')
v_u = numpy.array([1.,2.,3.]) v_u = numpy.array([1.,2.,3.])
v_x0 = numpy.array(1.) v_x0 = numpy.array(1.)
...@@ -244,6 +243,106 @@ class T_Scan(unittest.TestCase): ...@@ -244,6 +243,106 @@ class T_Scan(unittest.TestCase):
assert (compareArrays(out, v_out)) assert (compareArrays(out, v_out))
assert (compareArrays(v_u, out)) assert (compareArrays(v_u, out))
# Shared variable with updates
def test_8(self):
W1_vals = numpy.random.rand(20,30)
W2_vals = numpy.random.rand(30,20)
u1_vals = numpy.random.rand(3,20)
u2_vals = numpy.random.rand(3,30)
y0_vals = numpy.random.rand(3,20)
y1_vals = numpy.random.rand(20)
y2_vals = numpy.random.rand(30)
W1 = theano.shared(W1_vals)
W2 = theano.shared(W2_vals)
u1 = theano.shared(u1_vals)
y1 = theano.shared(y1_vals)
def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
0.33*y0_tm2 + 0.17*y0_tm3
y1_t = theano.dot(u2_t, W2) + y1_tm1
y2_t = theano.dot(u1_t, W1)
nwW1 = W1 + .1
nwW2 = W2 + .05
return ([y0_t, y1_t, y2_t], [(W1,nwW1), (W2, nwW2)])
u2 = theano.tensor.matrix()
y0 = theano.tensor.matrix()
y2 = theano.tensor.vector()
Y,upds = theano.scan(f, [u1,u2], [y0,y1,y2],[], outputs_taps = {0:[-3,-2,-1], 2:[]})
f = theano.function([u2,y0,y2], Y, updates = upds)
vls = f(u2_vals, y0_vals, y2_vals)
# do things in numpy
v_y0 = numpy.zeros((6,20))
v_y1 = numpy.zeros((4,20))
v_y2 = numpy.zeros((3,30))
v_y0[:3] = y0_vals
v_y1[0] = y1_vals
vW1 = W1_vals.copy()
vW2 = W2_vals.copy()
for idx in xrange(3):
v_y0[idx+3] = numpy.dot( numpy.dot(u1_vals[idx,:], vW1), vW2) + \
0.1*v_y0[idx+2] + 0.33*v_y0[idx+1] + 0.17*v_y0[idx]
v_y1[idx+1] = numpy.dot( u2_vals[idx,:], vW2) + v_y1[idx]
v_y2[idx] = numpy.dot( u1_vals[idx,:], vW1)
vW1 = vW1 + .1
vW2 = vW2 + .05
def test_8(self):
W_vals = numpy.random.rand(20,30) -.5
vis_val = numpy.random.binomial(1,0.5, size=(3,20))
bvis = numpy.random.rand(20) -.5
bhid = numpy.random.rand(30) -.5
tW = theano.shared(W_vals)
tbh = theano.shared(bhid)
tbv = theano.shared(bvis)
vis = theano.tensor.matrix()
trng = theano.tensor.shared_randomstreams.RandomStreams(123)
def f(vsample):
hmean = theano.tensor.nnet.sigmoid(theano.dot(vsample,tW)+ tbh)
hsample = trng.binomial(hmean.shape,1,hmean)
vmean = theano.tensor.nnet.sigmoid(theano.dot(hsample,tW.T)+ tbv)
return trng.binomial(vsample.shape,1,vsample)
v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10,
sequences_taps = {}, outputs_taps = {})
my_f = theano.function([vis], v_vals[-1], updates = updts)
def numpy_implementation(vsample):
rng = numpy.random.RandomState(123)
b1 = numpy.random.RandomState(rng.randint(2**30))
b2 = numpy.random.RandomState(rng.randint(2**30))
for idx in range(10):
hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,W_vals) + bhid)))
hsample = b1.binomial(1,hmean, size = hmean.shape)
vmean = 1./(1. + numpy.exp(-(numpy.dot(hsample,W_vals.T) + bvis)))
vsample = b2.binomial(1,vsample, size = vsample.shape)
return vsample
t_res = my_f(vis_val)
n_res = numpy_implementation(vis_val)
assert (compareArrays(t_res, n_res))
''' '''
# test gradient simple network # test gradient simple network
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论