提交 7cb2384c authored 作者: nouiz's avatar nouiz

Merge pull request #371 from pascanur/fixed_grad_of_grad_of_scan

Fixed grad of grad of scan
...@@ -30,7 +30,7 @@ functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``, ...@@ -30,7 +30,7 @@ functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``,
__docformat__ = 'restructedtext en' __docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu " __authors__ = ("Razvan Pascanu "
"Frederic Bastien " "Frederic Bastien "
"James Bergstra " "James Bergstra "
"Pascal Lamblin " "Pascal Lamblin "
......
...@@ -417,7 +417,8 @@ class ScanSaveMem(gof.Optimizer): ...@@ -417,7 +417,8 @@ class ScanSaveMem(gof.Optimizer):
# change the number of steps in that case. To do this we set # change the number of steps in that case. To do this we set
# global_nsteps to None which is seen as a flag that nothing needs # global_nsteps to None which is seen as a flag that nothing needs
# to be done # to be done
if len(node.outputs) <= c_outs: assert len(node.outputs) >= c_outs
if len(node.outputs) == c_outs:
global_nsteps = {'real': -1, 'sym': []} global_nsteps = {'real': -1, 'sym': []}
else: else:
global_nsteps = None global_nsteps = None
...@@ -474,7 +475,7 @@ class ScanSaveMem(gof.Optimizer): ...@@ -474,7 +475,7 @@ class ScanSaveMem(gof.Optimizer):
break break
# 2.3.2 extract the begin/end of the first dimension # 2.3.2 extract the begin/end of the first dimension
if i > op.n_mit_mot: if i >= op.n_mit_mot:
try: try:
length = shape_of[out][0] length = shape_of[out][0]
except KeyError: except KeyError:
...@@ -650,7 +651,8 @@ class ScanSaveMem(gof.Optimizer): ...@@ -650,7 +651,8 @@ class ScanSaveMem(gof.Optimizer):
tmp = tensor.as_tensor_variable(val) tmp = tensor.as_tensor_variable(val)
initl = tensor.as_tensor_variable(init_l[i]) initl = tensor.as_tensor_variable(init_l[i])
tmp = tensor.maximum(tmp, initl) tmp = tensor.maximum(tmp, initl)
tmp = pre_greedy_local_optimizer(list_opt_slice, tmp) tmp = pre_greedy_local_optimizer(list_opt_slice,
tmp)
tmp = pre_constant_merge([tmp])[0] tmp = pre_constant_merge([tmp])[0]
nw_input = nw_inputs[offset + idx][:tmp] nw_input = nw_inputs[offset + idx][:tmp]
......
...@@ -5,10 +5,10 @@ See scan.py for details on scan ...@@ -5,10 +5,10 @@ See scan.py for details on scan
""" """
__docformat__ = 'restructedtext en' __docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu " __authors__ = ("Razvan Pascanu "
"Frederic Bastien " "Frederic Bastien "
"James Bergstra " "James Bergstra "
"Pascal Lamblin " ) "Pascal Lamblin ")
__copyright__ = "(c) 2010, Universite de Montreal" __copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>" __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
...@@ -27,13 +27,13 @@ _logger = logging.getLogger('theano.scan_module.scan_views') ...@@ -27,13 +27,13 @@ _logger = logging.getLogger('theano.scan_module.scan_views')
# The ``map`` view of Scan Op. # The ``map`` view of Scan Op.
def map( fn def map(fn,
, sequences sequences,
, non_sequences = None non_sequences=None,
, truncate_gradient = -1 truncate_gradient=-1,
, go_backwards = False go_backwards=False,
, mode = None mode=None,
, name = None ): name=None):
""" """
Similar behaviour as python's map. Similar behaviour as python's map.
...@@ -58,24 +58,24 @@ def map( fn ...@@ -58,24 +58,24 @@ def map( fn
:param name: See ``scan``. :param name: See ``scan``.
""" """
return scan.scan( fn = fn return scan.scan(fn=fn,
, sequences = sequences sequences=sequences,
, outputs_info = [] outputs_info=[],
, non_sequences = non_sequences non_sequences=non_sequences,
, truncate_gradient = truncate_gradient truncate_gradient=truncate_gradient,
, go_backwards = go_backwards go_backwards=go_backwards,
, mode = mode mode=mode,
, name = name ) name=name)
# The ``reduce`` view of Scan Op. # The ``reduce`` view of Scan Op.
def reduce( fn def reduce(fn,
, sequences sequences,
, outputs_info outputs_info,
, non_sequences = None non_sequences=None,
, go_backwards = False go_backwards=False,
, mode = None mode=None,
, name = None ): name=None):
""" """
Similar behaviour as python's reduce Similar behaviour as python's reduce
...@@ -101,27 +101,27 @@ def reduce( fn ...@@ -101,27 +101,27 @@ def reduce( fn
:param name: See ``scan``. :param name: See ``scan``.
""" """
rval = scan.scan(fn = fn rval = scan.scan(fn=fn,
, sequences = sequences sequences=sequences,
, outputs_info = outputs_info outputs_info=outputs_info,
, non_sequences = non_sequences non_sequences=non_sequences,
, go_backwards = go_backwards go_backwards=go_backwards,
, truncate_gradient = -1 truncate_gradient=-1,
, mode = mode mode=mode,
, name = name ) name=name)
if isinstance(rval[0], (list,tuple)): if isinstance(rval[0], (list, tuple)):
return [ x[-1] for x in rval[0]], rval[1] return [x[-1] for x in rval[0]], rval[1]
else: else:
return rval[0][-1], rval[1] return rval[0][-1], rval[1]
# The ``foldl`` view of Scan Op. # The ``foldl`` view of Scan Op.
def foldl( fn def foldl(fn,
, sequences sequences,
, outputs_info outputs_info,
, non_sequences = None non_sequences=None,
, mode = None mode=None,
, name = None ): name=None):
""" """
Similar behaviour as haskell's foldl Similar behaviour as haskell's foldl
...@@ -143,22 +143,22 @@ def foldl( fn ...@@ -143,22 +143,22 @@ def foldl( fn
:param name: See ``scan``. :param name: See ``scan``.
""" """
return reduce( fn = fn return reduce(fn=fn,
, sequences = sequences sequences=sequences,
, outputs_info = outputs_info outputs_info=outputs_info,
, non_sequences = non_sequences non_sequences=non_sequences,
, go_backwards = False go_backwards=False,
, mode = mode mode=mode,
, name = name ) name=name)
# The ``foldl`` view of Scan Op. # The ``foldl`` view of Scan Op.
def foldr( fn def foldr(fn,
, sequences sequences,
, outputs_info outputs_info,
, non_sequences = None non_sequences=None,
, mode = None mode=None,
, name = None ): name=None):
""" """
Similar behaviour as haskell' foldr Similar behaviour as haskell' foldr
...@@ -180,10 +180,10 @@ def foldr( fn ...@@ -180,10 +180,10 @@ def foldr( fn
:param name: See ``scan``. :param name: See ``scan``.
""" """
return reduce( fn = fn return reduce(fn=fn,
, sequences = sequences sequences=sequences,
, outputs_info = outputs_info outputs_info=outputs_info,
, non_sequences = non_sequences non_sequences=non_sequences,
, go_backwards = True go_backwards=True,
, mode = mode mode=mode,
, name = name ) name=name)
...@@ -2585,6 +2585,26 @@ class T_Scan(unittest.TestCase): ...@@ -2585,6 +2585,26 @@ class T_Scan(unittest.TestCase):
tf = theano.function([c, x], dP) tf = theano.function([c, x], dP)
assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 38 assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 38
def test_grad_of_grad_of_state(self):
# Example provided Michael Forbes
# This tests ensures that we can compute gradients through cost
# defines in terms of gradients of scan
c = theano.tensor.vector('c')
x = theano.tensor.scalar('x')
_max_coefficients_supported = 1000
full_range = theano.tensor.arange(_max_coefficients_supported)
components, updates = theano.scan(
fn=lambda coeff, power, free_var: coeff * (free_var ** power),
outputs_info=None,
sequences=[c, full_range],
non_sequences=x)
P = components.sum()
dP = theano.tensor.grad(P, x).sum()
ddP = theano.tensor.grad(dP, x)
tf = theano.function([c, x], ddP)
assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 42
def test_return_steps(self): def test_return_steps(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.)) vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
...@@ -2705,9 +2725,9 @@ class T_Scan(unittest.TestCase): ...@@ -2705,9 +2725,9 @@ class T_Scan(unittest.TestCase):
grad_fn = theano.function([xinit, w], [gx,gw], grad_fn = theano.function([xinit, w], [gx,gw],
allow_input_downcast = True) allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
v_x = numpy.array(rng.uniform(size=(5,2,3), low=-2., high=2.), v_x = numpy.array(rng.uniform(size=(5,2,3), low=-3., high=3.),
dtype=theano.config.floatX) dtype=theano.config.floatX)
v_w = numpy.array(rng.uniform(size=(2,2)), dtype= theano.config.floatX) v_w = numpy.array(rng.uniform(size=(2,2), low=-3., high=3.), dtype= theano.config.floatX)
analytic_grad = grad_fn(v_x, v_w) analytic_grad = grad_fn(v_x, v_w)
num_grad = multiple_outputs_numeric_grad(cost_fn, num_grad = multiple_outputs_numeric_grad(cost_fn,
[v_x, v_w]) [v_x, v_w])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论