提交 31904be5 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

new optimization for scan + new feature for Dimshuffle

上级 3903e59f
......@@ -25,14 +25,18 @@ The Scan Op should typically be used by calling the ``scan()`` function.
"""
__docformat__ = 'restructedtext en'
import theano
from theano.tensor import opt, TensorType
from theano import gof, Apply
from theano.gof import Optimizer, toolbox
from theano.compile import optdb
import theano.tensor.shared_randomstreams as shared_random
from theano.gof.python25 import all
import tensor
import misc.safe_asarray as safe_asarray
from tensor import opt, TensorType
import gof
from gof import Optimizer, toolbox, Op, Apply
from compile import optdb, SharedVariable, function, Param
import compile
import tensor.shared_randomstreams as shared_random
import gradient
from gof.python25 import all
import copy
import tensor.elemwise as elemwise
import numpy
......@@ -205,7 +209,7 @@ def foldr(fn, sequences, outputs_info, non_sequences = [], mode = 'FAST_RUN'):
def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
n_steps = 0, truncate_gradient = -1, go_backwards = False,
n_steps = None, truncate_gradient = -1, go_backwards = False,
mode = None):
'''Function that constructs and applies a Scan op
......@@ -318,10 +322,13 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
:param n_steps:
Number of steps to iterate. If this value is provided scan will run only for
this amount of steps (given that the input sequences are sufficiently long).
If there is no input sequence (for example in case of a generator network) scan
will iterate for this number of steps. It can be a theano scalar or a number.
Number of steps to iterate. If the input sequences are not long enough, scan
will produce a warning and run only for the maximal amount of steps allowed by
the input sequences. If the value is 0, the outputs will have 0 rows. If the
value is negative, scan will run backwards (or if the flag go_backwards is
already set to true it will run forward in time). If n_steps is not provided,
or evaluetes not None, scan will figure out the maximal amount of steps it can
take and do that.
:param truncate_gradient:
Number of steps to use in truncated BPTT. If you compute gradients
......@@ -423,14 +430,21 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
# go through sequences picking up time slices as needed
for i,seq in enumerate(seqs):
if seq.get('taps', None):
slices = [ seq['input'][0].type() for k in seq['taps'] ]
slice_to_seqs += [ i for k in seq['taps']]
args += slices
for k in seq['taps']:
nw_slice = seq['input'][0].type()
# Add names to slices for debugging and pretty printing ..
if seq['input'].name:
nw_slice.name = seq['input'].name + '[%d]'%seq['taps'][k]
args.append(nw_slice)
slice_to_seqs.append(i)
dummy_notshared_ins += len(seq['taps'])
# go through outputs picking up time slices as needed
for i,init_out in enumerate(outs_info):
if init_out.get('taps', None) == [-1]:
args += [init_out['initial'].type()]
# Added name to slices for debugging and pretty printing
if init_out['initial'].name:
args[-1].name = init_out['initial'].name+'[-1]'
if slice_to_seqs:
val = slice_to_seqs[-1]
else:
......@@ -440,19 +454,23 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
elif init_out.get('taps',None):
if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
raise ValueError('Can not use future taps of outputs', init_out)
slices = [ init_out['initial'][0].type() for k in init_out['taps'] ]
if slice_to_seqs:
val = slice_to_seqs[-1]
else:
val = -1
slice_to_seqs += [ val+1 for k in init_out['taps'] ]
args += slices
for k in init_out['taps']:
nw_slice = init_out['initial'][0].type()
if init_out['initial'].name:
nw_slice.name = init_out['initial'].name + '[%d]'%init_out['taps'][k]
args.append(nw_slice)
slice_to_seqs.append(val+1)
dummy_notshared_init_outs += len(init_out['taps'])
# remove shared variables from the non sequences list
notshared_other_args = []
for non_seq in non_seqs:
if not isinstance(non_seq, theano.compile.SharedVariable):
if not isinstance(non_seq, SharedVariable):
notshared_other_args += [non_seq]
# add only the not shared variables to the arguments of the dummy
......@@ -498,12 +516,16 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
else:
outputs = outputs_updates
updates = {}
# in case you return a tuple .. convert it to a list (there are certain
# operation that are not permited on tuples, like element assignment)
outputs = list(outputs)
for i,out in enumerate(outputs):
outputs[i] = tensor.as_tensor(out)
# Wo compile a dummy function just to see what shared variable
# we have and what are their update rules
dummy_f = theano.function(dummy_args, outputs, updates = updates, mode = \
theano.compile.mode.Mode(linker = 'py', optimizer = None) )
dummy_f = function(dummy_args, outputs, updates = updates, mode = \
compile.mode.Mode(linker = 'py', optimizer = None) )
inner_fn_out_states = [ out.variable for out in dummy_f.maker.outputs]
update_map = {}
......@@ -542,7 +564,7 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
#
n_extended_outs = n_outs
for input in dummy_f.maker.expanded_inputs[fromIdx:] :
if isinstance(input.variable, theano.compile.SharedVariable) and input.update:
if isinstance(input.variable, SharedVariable) and input.update:
new_var = input.variable.type()
inner_fn_inputs.append(new_var)
if slice_to_seqs:
......@@ -559,7 +581,7 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
# add the rest:
for input in dummy_f.maker.expanded_inputs[fromIdx:] :
if isinstance(input.variable, theano.compile.SharedVariable) and not input.update:
if isinstance(input.variable, SharedVariable) and not input.update:
shared_non_seqs += [input.variable]
inner_fn_inputs += [input.variable.type() ]
if slice_to_seqs:
......@@ -567,22 +589,41 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
else: val = -1
slice_to_seqs += [val +1]
givens[input.variable] = inner_fn_inputs[-1]
elif not isinstance(input.variable, theano.compile.SharedVariable):
elif not isinstance(input.variable, SharedVariable):
inner_fn_inputs.append(input.variable)
n_fixed_steps = int(n_steps) if type(n_steps) in (float,int) else None
# check if it is actually a Theano constant
try :
n_fixed_steps = opt.get_constant_value(n_steps)
except:
n_fixed_steps = None
print '>>> ',n_fixed_steps
if (n_steps == None or n_steps == numpy.inf or n_steps == numpy.nan) and n_seqs == 0 :
raise ValueError('Scan does not know for how many steps to iterate. '
'You need to provide the number of steps through the '
' ``n_steps`` argument if you do not iterate over any sequence')
# Create the Scan op object
local_op = Scan( (inner_fn_inputs,inner_fn_out_states, givens, slice_to_seqs ), n_seqs,
n_extended_outs, inplace_map, sequences_taps, outputs_taps, truncate_gradient,
go_backwards, store_steps, mode)
go_backwards, store_steps, mode, n_fixed_steps = n_fixed_steps)
# Call the object on the input sequences, initial values for outs,
# and non sequences
for seq in seqs :
if not seq.get('input', None):
raiseValue('All input sequences should provide')
unwrapped_seqs = [ seq.get('input',theano.tensor.as_tensor(0.)) for seq in seqs ]
unwrapped_outs = [ out.get('initial',theano.tensor.as_tensor(0.)) for out in outs_info ]
values = local_op( *( [theano.tensor.as_tensor(n_steps)]
unwrapped_seqs = [ seq.get('input',tensor.as_tensor(0.)) for seq in seqs ]
unwrapped_outs = [ out.get('initial',tensor.as_tensor(0.)) for out in outs_info ]
if n_steps != None:
n_steps = tensor.as_tensor(n_steps)
else:
#n_steps = tensor.constant(numpy.inf,'?_steps')
n_steps = gof.Constant(gof.generic, 'unknown', '?_steps')
values = local_op( *( [n_steps]
+ unwrapped_seqs
+ unwrapped_outs
+ shared_outs
......@@ -602,9 +643,7 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
return (values, update_map)
class Scan(theano.Op):
class Scan(Op):
#
# OLD DOCUMENTATION CAN BE FOUND NEAR REVISION 2581
#
......@@ -613,7 +652,7 @@ class Scan(theano.Op):
inplace_map={}, seqs_taps={}, outs_taps={},
truncate_gradient = -1,
go_backwards = False, store_steps = {},
mode = 'FAST_RUN', inplace=False):
mode = 'FAST_RUN', n_fixed_steps = None, inplace=False):
'''
:param (inputs,outputs, givens,slice_to_seqs):
inputs and outputs Theano variables that describe the function that is
......@@ -635,6 +674,10 @@ class Scan(theano.Op):
steps (from the end towards the begining) of the outputs you really need and should
return; given this information, scan can know (if possible) to allocate only
the amount of memory needed to compute that many entries
:param n_fixed_steps: this is a number if n_steps in the scan function
received a number or None otherwise. The value is used to optimize
the graph, since a scan that has n_steps fixed to 1 or 0 is not
really needed in the graph. (? could we use tag hints ?)
'''
#check sequences past taps
for k,v in seqs_taps.iteritems():
......@@ -671,10 +714,10 @@ class Scan(theano.Op):
# get seq number
n_seq = slice_to_seqs[idx]
if n_seq in inplace_map.keys():
if type(inputs[n_seq]) is theano.Param:
if type(inputs[n_seq]) is Param:
inputs[n_seq].mutable = True
else:
inputs[n_seq] = theano.Param( inputs[n_seq], mutable = True)
inputs[n_seq] = Param( inputs[n_seq], mutable = True)
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
......@@ -687,19 +730,24 @@ class Scan(theano.Op):
self.inputs = inputs
self.givens = givens
self.outputs = outputs
# This is here just for an optimization to be able to pick up if
# scan is really needed in the graph; if the number of steps
# scan does is a constant of 1, -1 or 0 then we can remove scan
# from the graph
self.n_fixed_steps = n_fixed_steps
self.mode = mode
self.truncate_gradient = truncate_gradient
self.go_backwards = go_backwards
self.slice_to_seqs = slice_to_seqs
self.fn = theano.function(inputs,outputs, mode = mode, givens = givens)
assert not numpy.any( [isinstance(x.variable,theano.compile.SharedVariable) for x in \
self.fn = function(inputs,outputs, mode = mode, givens = givens)
assert not numpy.any([isinstance(x.variable,SharedVariable) for x in
self.fn.maker.inputs])
def make_node(self,*inputs):
assert all(isinstance(i, theano.Variable) for i in inputs)
assert all(isinstance(i, gof.Variable) for i in inputs)
return Apply(self, inputs, [t() for t in self.apply_output_types])
......@@ -721,6 +769,7 @@ class Scan(theano.Op):
(self.go_backwards == other.go_backwards) and\
(self.truncate_gradient == other.truncate_gradient) and\
(self.n_outs == other.n_outs) and\
(self.n_fixed_steps == other.n_fixed_steps) and\
(self.n_args == other.n_args)
return rval
......@@ -736,6 +785,7 @@ class Scan(theano.Op):
hash(self.truncate_gradient) ^\
hash(self.n_args) ^ \
hash(self.mode) ^\
hash(self.n_fixed_steps) ^\
hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.givens) ^ \
......@@ -765,14 +815,23 @@ class Scan(theano.Op):
Y sequence outputs y_1, y_2, ... y_<self.n_outs>
"""
n_steps = 0
if (self.n_seqs ==0 ) and (args[0] == 0):
raise ValueError('Scan does not know over how many steps it '
'should iterate! No input sequence or number of steps to '
'iterate given !')
n_steps = args[0]
if n_steps != 'unknown':
n_steps = int(n_steps)
if n_steps < 0:
n_steps = abs(n_steps)
go_backwards = not self.go_backwards
else:
go_backwards = self.go_backwards
else:
n_steps = None
go_backwards = self.go_backwards
if (args[0] != 0):
n_steps = args[0]
if (self.n_seqs == 0 ) and (not numpy.isfinite(n_steps) ):
raise ValueError('Scan does not know how many steps it '
'should iterate! Either provide some input sequences from '
'which scan could find out the number of steps, or directly'
'the number of steps you want through the n_steps argument.')
for i in xrange(self.n_seqs):
if self.seqs_taps.has_key(i):
......@@ -782,12 +841,17 @@ class Scan(theano.Op):
if max( self.seqs_taps[i]) > 0:
# using future values, so need to end the sequence earlier
seq_len -= max(self.seqs_taps[i])
if n_steps == 0 :
if n_steps == None :
# length of the sequences, leaving room for the largest
n_steps = seq_len
if seq_len != n_steps :
warning(('Input sequence %d has a shorter length then the '
'expected number of steps %d')%(i,n_steps))
if seq_len > n_steps:
warning('Input sequence is longer then required. '
'Extra values will be ignored')
else:
warning(' Input sequence is shorter then the number '
'of steps scan was suppose to do. Readjusting'
'the number of steps scan will iterate ... ')
n_steps = min(seq_len,n_steps)
......@@ -810,18 +874,9 @@ class Scan(theano.Op):
self.n_steps = n_steps
y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs,
self.seqs_taps, self.outs_taps, n_steps, self.go_backwards,
self.seqs_taps, self.outs_taps, n_steps, go_backwards,
inplace_map)
'''
# write to storage, converting if needed ( why do we have the wrong dtype !???)
# -- solved --
for i in xrange(self.n_outs):
if hasattr(node.outputs[i], 'dtype'):
outs[i][0] = theano._asarray(y[i], dtype=node.outputs[i].dtype)
else:
outs[i][0] = y[i]
'''
for i in xrange(self.n_outs):
if self.store_steps[i] > 1 :
# we need to reorder the steps .. to have them in the correct order
......@@ -1010,12 +1065,12 @@ class Scan(theano.Op):
g_y = [outputs[0].type()]
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
gmap = gradient.grad_sources_inputs( \
[(y,g_y)], gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
return tensor.TensorConstant(tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
theano._asarray(0,dtype = p.type.dtype))
safe_asarray._asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
......@@ -1038,7 +1093,7 @@ class Scan(theano.Op):
# backwards pass
for i in xrange(len(y)):
if g_outs[i] == None:
g_outs[i] = theano.tensor.zeros_like(y[i])
g_outs[i] = tensor.zeros_like(y[i])
g_args = [self.n_steps]+g_outs + y
# check if go_backwards is true
......@@ -1059,111 +1114,12 @@ class Scan(theano.Op):
'''
class ScanSpaceOptimizer(Optimizer):
""" Graph Optimizer that reduces scan memory consumption """
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def apply(self, env):
nodelist = list(env.toposort())
for node in nodelist:
op = node.op
# If it is a scan Op
if isinstance(op, Scan):
outputs = node.outputs
store_steps = [0 for x in outputs]
# check the otuputs
for i,out in enumerate(node.outputs):
if op.store_steps[i] == 0 :
# if we do not have a range for this output
req_steps = numpy.max(numpy.abs(op.outs_taps.get(i,1)))
# look at all its clients
for cl,_dx in out.clients:
if type(cl) == str:
# if the node is actually an output, then
# we need to store the entire thing
req_steps = 0
break
else:
if not isinstance(cl.op,
theano.tensor.basic.Subtensor):
# if any of the clients is not a subtensor
# we also need to store the enitre thing
req_steps = 0
break
else:
# if it is a tensor, and the first
# dimension is just -1
if cl.op.idx_list[0] == -1 :
req_steps = numpy.max([1, req_steps])
else:
# or a constant that evaluates to
# -1
try:
idx = opt.get_constant_value(cl.op.idx_list[0])
if idx== -1:
req_steps = numpy.max([1, req_steps])
else:
req_steps = 0
break
except:
req_steps = 0
break
store_steps[i] = req_steps
else:
store_steps[i] = op.store_steps[i]
if numpy.any(store_steps!= op.store_steps):
new_scan = Scan((op.inputs, op.outputs, op.givens,
op.slice_to_seqs),op.n_seqs, op.n_outs,
op.inplace_map, op.seqs_taps, op.outs_taps,
op.truncate_gradient, op.go_backwards,
store_steps, op.mode,op.inplace).make_node(*node.inputs)
# we not need to replace the outputs of scan
for i,out in enumerate(node.outputs):
# if we are dealing with an output for which
# we changed the number of stored steps we
# also need to get rid off the subtensor
if op.store_steps[i] == 0 and store_steps[i] == 1:
# get the output of the subtensor variables
outSubTens = [ x[0].outputs[0] for x in out.clients ]
new_old = [(x,new_scan.outputs[i]) for x in outSubTens]
env.replace_all_validate(new_old,reason =
'scan_space_optimizer')
else:
env.replace_all_validate([(out,
new_scan.outputs[i])], reason =
'scan_space_optimizer')
optdb.register('scanOp_space_optimization', ScanSpaceOptimizer(), 74, 'fast_run')
@gof.local_optimizer([None])
def scan_make_inplace(node):
op = node.op
if isinstance(op, Scan) and (not op.inplace) and (op.inplace_map.keys() != []):
return Scan((op.inputs, op.outputs, op.givens, op.slice_to_seqs ) , op.n_seqs,
op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps,
op.truncate_gradient, op.go_backwards, op.store_steps, op.mode,
inplace=True ).make_node(*node.inputs).outputs
return False
optdb.register('scanOp_make_inplace', opt.in2out(scan_make_inplace,
ignore_newtrees=True), 75, 'fast_run', 'inplace')
'''
class ScanGrad(theano.Op):
class ScanGrad(Op):
"""Gradient Op for Scan"""
def __init__(self,(g_ins, g_outs) , n_seqs, n_outs,
seqs_taps = {}, outs_taps= {}, truncate_gradient = -1):
self.grad_fn = theano.function(g_ins, g_outs)
self.grad_fn = function(g_ins, g_outs)
self.inputs = g_ins
self.outputs = g_outs
self.n_seqs = n_seqs
......@@ -1203,7 +1159,7 @@ class ScanGrad(theano.Op):
# return
# | grad of seqs | grad of outs | grad of non_seqs |
# | n_seqs | n_outs | unknown |
return theano.Apply(self, list(args),
return Apply(self, list(args),
[i.type() for i in args[1+2*self.n_outs:] ])
def perform(self, node, args, storage):
......@@ -1318,3 +1274,237 @@ class ScanGrad(theano.Op):
class ScanSpaceOptimizer(Optimizer):
""" Graph Optimizer that reduces scan memory consumption """
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def apply(self, env):
nodelist = list(env.toposort())
for node in nodelist:
op = node.op
# If it is a scan Op
if isinstance(op, Scan):
outputs = node.outputs
store_steps = [0 for x in outputs]
# check the otuputs
for i,out in enumerate(node.outputs):
if op.store_steps[i] == 0 :
# if we do not have a range for this output
req_steps = numpy.max(numpy.abs(op.outs_taps.get(i,1)))
# look at all its clients
for cl,_dx in out.clients:
if type(cl) == str:
# if the node is actually an output, then
# we need to store the entire thing
req_steps = 0
break
else:
if not isinstance(cl.op,
tensor.basic.Subtensor):
# if any of the clients is not a subtensor
# we also need to store the enitre thing
req_steps = 0
break
else:
# if it is a tensor, and the first
# dimension is just -1
if cl.op.idx_list[0] == -1 :
req_steps = numpy.max([1, req_steps])
else:
# or a constant that evaluates to
# -1
try:
idx = opt.get_constant_value(cl.op.idx_list[0])
if idx== -1:
req_steps = numpy.max([1, req_steps])
else:
req_steps = 0
break
except:
req_steps = 0
break
store_steps[i] = req_steps
else:
store_steps[i] = op.store_steps[i]
if numpy.any(store_steps!= op.store_steps):
new_scan = Scan((op.inputs, op.outputs, op.givens,
op.slice_to_seqs),op.n_seqs, op.n_outs,
op.inplace_map, op.seqs_taps, op.outs_taps,
op.truncate_gradient, op.go_backwards,
store_steps, op.mode,op.n_fixed_steps,
op.inplace).make_node(*node.inputs)
# we not need to replace the outputs of scan
for i,out in enumerate(node.outputs):
# if we are dealing with an output for which
# we changed the number of stored steps we
# also need to get rid off the subtensor
if op.store_steps[i] == 0 and store_steps[i] == 1:
# get the output of the subtensor variables
outSubTens = [ x[0].outputs[0] for x in out.clients ]
new_old = [(x,new_scan.outputs[i]) for x in outSubTens]
env.replace_all_validate(new_old,reason =
'scan_space_optimizer')
else:
env.replace_all_validate([(out,
new_scan.outputs[i])], reason =
'scan_space_optimizer')
optdb.register('scanOp_space_optimization', ScanSpaceOptimizer(), 74, 'fast_run')
@gof.local_optimizer([None])
def scan_make_inplace(node):
op = node.op
if isinstance(op, Scan) and (not op.inplace) and (op.inplace_map.keys() != []):
return Scan((op.inputs, op.outputs, op.givens, op.slice_to_seqs ) , op.n_seqs,
op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps,
op.truncate_gradient, op.go_backwards, op.store_steps, op.mode,
op.n_fixed_steps, inplace=True ).make_node(*node.inputs).outputs
return False
optdb.register('scanOp_make_inplace', opt.in2out(scan_make_inplace,
ignore_newtrees=True), 75, 'fast_run', 'inplace')
class ScanRemoveFromGraph(Optimizer):
''' Graph Optmizer that removes scan if you just do a loop of 1 '''
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self, env):
env.extend(toolbox.ReplaceValidate())
def apply(self,env):
nodelist = list(env.toposort())
for node in nodelist:
op = node.op
# If it is a scan Op
if isinstance(op, Scan) and op.n_fixed_steps != None:
print ':::::::::',op.n_fixed_steps
print '---------', abs(op.n_fixed_steps) < 2
if abs(op.n_fixed_steps) < 2:
# Step 1 replace the inputs of the inner function
# with the inputs of scan
# Start replacing
# idx_curr_inp -> index that goes through the extended
# inputs of the op (includes shared variables) that are
# not provided to the node as inputs !!
idx_curr_inp = -1
# keeps track of what slice of the current input we are
# currently dealing with
slice = -1
# keeps track of the index that goes through the actual
# inputs of the node
idx_node_inp = 0
# pairs of variables that we need to replace in the end
replace_pairs = {}
# go through the inputs of the inner function
for i,inp in enumerate(op.inputs):
# figure what what slice of what node input this represents
if i < len(op.slice_to_seqs):
# slice_to_seqs is an array of the form [1 1 2 3 3 3 ],
# meaning that the 1st input of the inner function is a
# slice of the 1st input of scan, 2nd input of the inner
# function is a slice of the 1st input of scan and so on..
arg = op.slice_to_seqs[i]
# check if this is a slice of the current input
if arg == idx_curr_inp:
# if so increase the number of the current slice
slice+= 1
else:
# if not reset slice, make this the new current
# input
slice = 0
idx_curr_inp = arg
# and check if it is a shared variables
# scan deals with shared variables by replacing them
# with copies using the given argument of theano.function
# so if we have a shared variable it should appear in
# op.givens !!
if inp not in op.givens:
# if it is not a shared variable increase the index
# of the current input
# note that we will jump to 1; this is fine since
# node.inputs[0] is the number of steps, which we
# should not consider here .. we care of what follows
# namely the sequences, initial states, non sequences...
idx_node_inp += 1
if inp not in op.givens:
# This is not a shared variable so we can replace it
# ( we should not replace the shared variables, theano.function
# will take care of shared variables here ..)
if idx_curr_inp >= op.n_seqs:
# we are dealing with a initial state of some output
# check if we are dealing with a 1 past tap output
one_step = False
if not op.outs_taps.has_key(idx_curr_inp-op.n_seqs):
one_step = True
else:
if op.outs_taps[idx_curr_inp - op.n_seqs] == [-1]:
one_step = True
if one_step:
node_input = node.inputs[idx_node_inp]
else:
tap = op.outs_taps[idx_curr_inp-op.n_seqs][slice]
min_tap = min(op.outs_taps[idx_curr_inp-op.n_seqs])
node_input = node.inputs[idx_node_inp][tap-min_tap]
else:
# we are dealing with a slice of a sequence
tap = op.seqs_taps[idx_curr_inp][slice]
min_tap = min(op.seqs_taps[idx_curr_inp])
node_input = node.inputs[idx_node_inp][tap-min_tap]
# add to our replace_pairs list
replace_pairs[inp] = node_input
else:
# if we got here this means we are dealing with non_sequences,
# which do not have slices !
# check to see if we are dealing with a shared variable
if inp not in op.givens:
idx_node_inp += 1
replace_pairs[inp] = node.inputs[idx_node_inp]
def my_replace( node, replace_pairs):
# Turns out that using env replace (while safe) is
# a real pain because of many condition that have to
# be met which I can not met while doing the
# replacement, so I did my little hack that does
# something like a replacement
# ASSUMPTIONS:
# we do not do anything crazy like replacing x
# with something in terms of x !
#
# we do not have envs or anything, just a simple
# computational graph that has not been compiled
# yet
if node:
for i,inp in enumerate(node.inputs):
if inp in replace_pairs:
node.inputs[i] = replace_pairs[inp]
else:
inp.owner = my_replace(inp.owner, replace_pairs)
return node
else:
return node
my_outs = op.outputs
for i, out in enumerate(my_outs):
my_outs[i].owner = my_replace(out.owner, replace_pairs)
for idx in xrange(len(my_outs)):
t = my_outs[idx]
p = ['f'] + [i for i in range(t.type.ndim)]
nwout = elemwise.DimShuffle(t.broadcastable,p)(t)
env.replace(node.outputs[idx],nwout)
# we are done ...
# is 30 soon enough !? I want to do it as early as possible .. such that
# the new graph gets optimized
optdb.register('scanOp_remove_from_graph', ScanRemoveFromGraph() , 30, 'fast_run')
......@@ -43,6 +43,10 @@ class DimShuffle(Op):
dimension and a numerical index represents the dimension of the same
rank in the tensor passed to perform.
Note 2.04.2010 RP Added 'f' - means that we insert a non-broadcastable
dimension; 'f' behaves exactly like 'x', just that the new dimension is
not broadcastable
Examples:
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
......@@ -120,10 +124,10 @@ class DimShuffle(Op):
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra
# 'x' broadcastable dimensions to insert.
self.shuffle = [i2j[x] for x in new_order if x != 'x']
self.shuffle = [i2j[x] for x in new_order if x != 'x' and x != 'f']
# list of dimensions of the output that are broadcastable and were not in the original input
self.augment = [i for i, x in enumerate(new_order) if x == 'x']
self.augment = [i for i, x in enumerate(new_order) if x == 'x' or x == 'f']
if self.inplace:
self.view_map = {0: [0]}
......@@ -147,6 +151,8 @@ class DimShuffle(Op):
for value in self.new_order:
if value == 'x':
ob.append(True)
elif value == 'f':
ob.append(False)
else:
ob.append(ib[value])
......@@ -235,7 +241,7 @@ class DimShuffle(Op):
shape_statements = ['npy_intp dimensions[%i]'%nd_out]
for i, o in enumerate(self.new_order):
if o != 'x':
if o != 'x' and o != 'f':
shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')]
else:
shape_statements += [('dimensions['+str(i)+'] = 1')]
......@@ -250,7 +256,7 @@ class DimShuffle(Op):
#set the strides of the non-broadcasted dimensions
for i, o in enumerate(self.new_order):
if o != 'x':
if o != 'x' and o != 'f':
strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')]
else:
strides_statements += [('strides['+str(i)+'] = 0')]
......@@ -317,7 +323,7 @@ class DimShuffle(Op):
gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
if v != 'x':
if v != 'x' and v !='f':
grad_order[v] = i
return [DimShuffle(gz.type.broadcastable, grad_order, inplace=True)(Elemwise(scalar.identity)(gz))]
......
......@@ -125,7 +125,7 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = False)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
......@@ -146,7 +146,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_values, v_out)
# simple rnn, one input, one state, weights for each; input/state
# are vectors, weights are scalars; using shared variables
def test_one_sequence_one_output_weights_shared(self):
......@@ -159,7 +158,7 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_t,x_tm1, tmp_W_in, tmp_W):
return u_t*tmp_W_in+x_tm1*tmp_W
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =0,
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =None,
truncate_gradient= -1, go_backwards = False)
f3 = theano.function([u,x0], output, updates = updates)
# get random initial values
......@@ -176,7 +175,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_values, v_out)
# some rnn with multiple outputs and multiple inputs; other
# dimension instead of scalars/vectors
def test_multiple_inputs_multiple_outputs(self):
......@@ -203,7 +201,7 @@ class T_Scan(unittest.TestCase):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = 0,
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs, updates = updates)
......@@ -222,7 +220,6 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose(theano_y , v_y)
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
......@@ -242,7 +239,7 @@ class T_Scan(unittest.TestCase):
return u_tm2*W_in+x_tm1*W+x_tm2
outputs, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2),
dict(initial = x0, taps = [-1,-2]), [], n_steps = 0, truncate_gradient = -1,
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient = -1,
go_backwards = False)
f7 = theano.function([u,x0], outputs, updates = updates)
......@@ -282,7 +279,7 @@ class T_Scan(unittest.TestCase):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
output,updates = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
dict(initial = x0, taps = [-1,-2]), [], n_steps =0, truncate_gradient =-1,
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient =-1,
go_backwards = False)
f8 = theano.function([u,x0], output, updates = updates)
......@@ -324,7 +321,7 @@ class T_Scan(unittest.TestCase):
outputs, updates = theano.scan(f_rnn_shared, [u0,u1,u2],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = 0, truncate_gradient = -1, go_backwards = False, mode=mode )
[], n_steps = None, truncate_gradient = -1, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
# compute output in numpy
......@@ -374,7 +371,7 @@ class T_Scan(unittest.TestCase):
outputs, updates = theano.scan(f_rnn_shared,
[u0,dict(input = u1, taps = [0,1]),dict( input = u2, taps= [-1,0,+1])],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = 0, truncate_gradient = 01, go_backwards = False, mode=mode )
[], n_steps = None, truncate_gradient = 01, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
# compute output in numpy
......@@ -429,7 +426,7 @@ class T_Scan(unittest.TestCase):
y0 = theano.tensor.matrix('y0')
outputs,updates = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1,
None], [], n_steps = 0, go_backwards = False, truncate_gradient = -1)
None], [], n_steps = None, go_backwards = False, truncate_gradient = -1)
f10 = theano.function([u2,y0], outputs, updates = updates)
theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
......@@ -545,7 +542,7 @@ class T_Scan(unittest.TestCase):
u = theano.tensor.dvector()
outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =0 , truncate_gradient = -1,
outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =None , truncate_gradient = -1,
go_backwards = False)
f2 = theano.function([u], outputs, updates = updates)
......@@ -578,7 +575,7 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = True)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
......@@ -607,9 +604,7 @@ class T_Scan(unittest.TestCase):
rng = numpy.random.RandomState(utt.fetch_seed())
v_v = rng.uniform( size = (5,), low = -5., high = 5.)
print f(v_v,0.)
assert ( numpy.sum(v_v) == f(v_v, 0.) )
assert abs(numpy.sum(v_v) - f(v_v, 0.)) < 1e-3
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论