提交 7cb2384c authored 作者: nouiz's avatar nouiz

Merge pull request #371 from pascanur/fixed_grad_of_grad_of_scan

Fixed grad of grad of scan
......@@ -30,11 +30,11 @@ functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``,
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin "
"Arnaud Bergeron ")
__authors__ = ("Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin "
"Arnaud Bergeron ")
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
......
......@@ -417,7 +417,8 @@ class ScanSaveMem(gof.Optimizer):
# change the number of steps in that case. To do this we set
# global_nsteps to None which is seen as a flag that nothing needs
# to be done
if len(node.outputs) <= c_outs:
assert len(node.outputs) >= c_outs
if len(node.outputs) == c_outs:
global_nsteps = {'real': -1, 'sym': []}
else:
global_nsteps = None
......@@ -474,7 +475,7 @@ class ScanSaveMem(gof.Optimizer):
break
# 2.3.2 extract the begin/end of the first dimension
if i > op.n_mit_mot:
if i >= op.n_mit_mot:
try:
length = shape_of[out][0]
except KeyError:
......@@ -650,7 +651,8 @@ class ScanSaveMem(gof.Optimizer):
tmp = tensor.as_tensor_variable(val)
initl = tensor.as_tensor_variable(init_l[i])
tmp = tensor.maximum(tmp, initl)
tmp = pre_greedy_local_optimizer(list_opt_slice, tmp)
tmp = pre_greedy_local_optimizer(list_opt_slice,
tmp)
tmp = pre_constant_merge([tmp])[0]
nw_input = nw_inputs[offset + idx][:tmp]
......
......@@ -5,10 +5,10 @@ See scan.py for details on scan
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__authors__ = ("Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin ")
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
......@@ -27,13 +27,13 @@ _logger = logging.getLogger('theano.scan_module.scan_views')
# The ``map`` view of Scan Op.
def map( fn
, sequences
, non_sequences = None
, truncate_gradient = -1
, go_backwards = False
, mode = None
, name = None ):
def map(fn,
sequences,
non_sequences=None,
truncate_gradient=-1,
go_backwards=False,
mode=None,
name=None):
"""
Similar behaviour as python's map.
......@@ -58,24 +58,24 @@ def map( fn
:param name: See ``scan``.
"""
return scan.scan( fn = fn
, sequences = sequences
, outputs_info = []
, non_sequences = non_sequences
, truncate_gradient = truncate_gradient
, go_backwards = go_backwards
, mode = mode
, name = name )
return scan.scan(fn=fn,
sequences=sequences,
outputs_info=[],
non_sequences=non_sequences,
truncate_gradient=truncate_gradient,
go_backwards=go_backwards,
mode=mode,
name=name)
# The ``reduce`` view of Scan Op.
def reduce( fn
, sequences
, outputs_info
, non_sequences = None
, go_backwards = False
, mode = None
, name = None ):
def reduce(fn,
sequences,
outputs_info,
non_sequences=None,
go_backwards=False,
mode=None,
name=None):
"""
Similar behaviour as python's reduce
......@@ -101,27 +101,27 @@ def reduce( fn
:param name: See ``scan``.
"""
rval = scan.scan(fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = go_backwards
, truncate_gradient = -1
, mode = mode
, name = name )
if isinstance(rval[0], (list,tuple)):
return [ x[-1] for x in rval[0]], rval[1]
rval = scan.scan(fn=fn,
sequences=sequences,
outputs_info=outputs_info,
non_sequences=non_sequences,
go_backwards=go_backwards,
truncate_gradient=-1,
mode=mode,
name=name)
if isinstance(rval[0], (list, tuple)):
return [x[-1] for x in rval[0]], rval[1]
else:
return rval[0][-1], rval[1]
# The ``foldl`` view of Scan Op.
def foldl( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
def foldl(fn,
sequences,
outputs_info,
non_sequences=None,
mode=None,
name=None):
"""
Similar behaviour as haskell's foldl
......@@ -143,22 +143,22 @@ def foldl( fn
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = False
, mode = mode
, name = name )
return reduce(fn=fn,
sequences=sequences,
outputs_info=outputs_info,
non_sequences=non_sequences,
go_backwards=False,
mode=mode,
name=name)
# The ``foldl`` view of Scan Op.
def foldr( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
def foldr(fn,
sequences,
outputs_info,
non_sequences=None,
mode=None,
name=None):
"""
Similar behaviour as haskell' foldr
......@@ -180,10 +180,10 @@ def foldr( fn
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = True
, mode = mode
, name = name )
return reduce(fn=fn,
sequences=sequences,
outputs_info=outputs_info,
non_sequences=non_sequences,
go_backwards=True,
mode=mode,
name=name)
......@@ -2585,6 +2585,26 @@ class T_Scan(unittest.TestCase):
tf = theano.function([c, x], dP)
assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 38
def test_grad_of_grad_of_state(self):
# Example provided Michael Forbes
# This tests ensures that we can compute gradients through cost
# defines in terms of gradients of scan
c = theano.tensor.vector('c')
x = theano.tensor.scalar('x')
_max_coefficients_supported = 1000
full_range = theano.tensor.arange(_max_coefficients_supported)
components, updates = theano.scan(
fn=lambda coeff, power, free_var: coeff * (free_var ** power),
outputs_info=None,
sequences=[c, full_range],
non_sequences=x)
P = components.sum()
dP = theano.tensor.grad(P, x).sum()
ddP = theano.tensor.grad(dP, x)
tf = theano.function([c, x], ddP)
assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 42
def test_return_steps(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
......@@ -2705,9 +2725,9 @@ class T_Scan(unittest.TestCase):
grad_fn = theano.function([xinit, w], [gx,gw],
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
v_x = numpy.array(rng.uniform(size=(5,2,3), low=-2., high=2.),
v_x = numpy.array(rng.uniform(size=(5,2,3), low=-3., high=3.),
dtype=theano.config.floatX)
v_w = numpy.array(rng.uniform(size=(2,2)), dtype= theano.config.floatX)
v_w = numpy.array(rng.uniform(size=(2,2), low=-3., high=3.), dtype= theano.config.floatX)
analytic_grad = grad_fn(v_x, v_w)
num_grad = multiple_outputs_numeric_grad(cost_fn,
[v_x, v_w])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论