提交 787133a7 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2364 from lamblin/scan_nonseq_grad

[WIP] Detect Null gradient wrt non-sequences in scan
......@@ -1883,13 +1883,27 @@ class Scan(PureOp):
type_outs.append('disconnected')
else:
type_outs.append('connected')
inner_inp_sitsot = dC_dXtm1s[ins_pos - self.n_seqs:]
outer_inp_sitsot = [
tensor.zeros([grad_steps + 1] +
[x.shape[i] for i in xrange(x.ndim)],
dtype=y.dtype)
for y, x in zip(inner_inp_sitsot,
self.outer_non_seqs(inputs))]
outer_inp_sitsot = []
for _idx, y in enumerate(inner_inp_sitsot):
x = self.outer_non_seqs(inputs)[_idx]
if isinstance(y.type, NullType):
# Cannot use dC_dXtm1s.dtype, so we use floatX instead.
outer_inp_sitsot.append(
tensor.zeros([grad_steps + 1] +
[x.shape[i] for i in xrange(x.ndim)],
dtype=theano.config.floatX))
# replace y by a zero tensor of the right shape
inner_inp_sitsot[_idx] = tensor.zeros(
diff_inputs[ins_pos + _idx].shape,
dtype=theano.config.floatX)
else:
outer_inp_sitsot.append(
tensor.zeros([grad_steps + 1] +
[x.shape[i] for i in xrange(x.ndim)],
dtype=y.dtype))
n_sitsot_outs = len(outer_inp_sitsot)
new_tap_array = mitmot_inp_taps + [[-1] for k in
......
......@@ -10,6 +10,7 @@ import cPickle
import numpy
from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr
from nose.tools import assert_raises
from numpy.testing import dec
import theano
......@@ -3122,6 +3123,38 @@ class T_Scan(unittest.TestCase):
assert out[4] == 19
# 19.0
def test_crash_nonseq_grad(self):
# Test case was originally reported by Bitton Tenessi. It crashed
# during the grad operation and this tests validates that it now
# raises a NullTypeGradError instead because the gradient relies on
# the intermediary states of the random number generators used in the
# test. The test case was modified from the original for simplicity
rand_stream = tensor.shared_randomstreams.RandomStreams()
inp = tensor.matrix()
norm_inp = inp / tensor.sum(inp, axis=0)
def unit_dropout(out_idx):
def stochastic_pooling(in_idx):
# sample the input matrix for each column according to the
# column values
pvals = norm_inp.T
sample = rand_stream.multinomial(n=1, pvals=pvals)
return inp + sample
pooled, updates_inner = theano.scan(fn=stochastic_pooling,
sequences=tensor.arange(inp.shape[0]))
# randomly add stuff to units
rand_nums = rand_stream.binomial(size=pooled.shape)
return pooled + rand_nums, updates_inner
out, updates_outer = theano.scan(unit_dropout,
sequences=[tensor.arange(inp.shape[0])])
assert_raises(theano.gradient.NullTypeGradError,
tensor.grad, out.sum(), inp)
def test_bugFunctioProvidesIntermediateNodesAsInputs(self):
# This is a bug recently reported by Ilya
# made it CPU friendly
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论