提交 d57e5031 authored 作者: James Bergstra's avatar James Bergstra

fixed a bug with random number generation by submodules. See Oliviers email to…

fixed a bug with random number generation by submodules. See Oliviers email to theano-dev for details.
上级 9a21106f
......@@ -616,6 +616,17 @@ def function(inputs, outputs, mode='FAST_RUN', accept_inplace = False):
Out instance if necessary:
* a Result instance r will be upgraded like Out(r)
Random Numbers
--------------
If your computation involves random numbers, then you have to pass the `RandomKit` as an
input argument. That RandomKit must have a name to be able to seed the generator. To seed
the generator, use the __getitem__ method:
f[<kitname>] = seed #re-seed the elements of a RandomKit
"""
def wrap_in(input):
......
......@@ -240,14 +240,14 @@ class Member(_RComponent):
if memo and r in memo:
return memo[r]
rval = gof.Container(r, storage = [None])
memo[r] = rval
return rval
memo[r] = io.In(result = r, value = rval, mutable = False)
return memo[r]
def build(self, mode, memo):
"""
Returns the Container associated to this Member's Result.
"""
return memo[self.r]
return memo[self.r].value
......@@ -344,17 +344,17 @@ class Method(Component):
' Verify that it is indeed a Member of the'
' enclosing module or of one of its submodules.' % (r, self))
else:
return gof.Container(r, storage = [None])
return io.In(result = r, value = gof.Container(r, storage = [None]), mutable = False)
# Wrap the inputs in In instances.
inputs = self.inputs
inputs = [io.In(result = input,
value = get_storage(input),
value = get_storage(input).value,
mutable = False)
for input in inputs]
# Add the members to update to the inputs.
inputs += [io.In(result = k,
update = v,
value = get_storage(k, not allocate_all),
value = get_storage(k, not allocate_all).value,
mutable = True,
strict = True)
for k, v in self.updates.iteritems()]
......@@ -367,14 +367,14 @@ class Method(Component):
if input not in _inputs and not isinstance(input, gof.Value):
# Add this input to the inputs; we require that storage already exists for them,
# but otherwise they are immutable.
inputs += [io.In(result = input,
value = get_storage(input, not allocate_all),
mutable = False)]
storage = get_storage(input, not allocate_all)
inputs.append(storage)
# Add the kits to the input. The kit should be associated in
# memo to a list of Containers. theano.function handles that
# case by picking only the needed Containers from the list, so
# here we can just delegate to theano.function.
inputs += [(kit, get_storage(kit, not allocate_all)) for kit in self.kits]
#inputs += [(kit, get_storage(kit, not allocate_all)) for kit in self.kits]
return F.function(inputs, outputs, mode)
def pretty(self, **kwargs):
......@@ -392,19 +392,6 @@ class Method(Component):
einputs, eoutputs = f.maker.env.inputs, f.maker.env.outputs
updates = dict(((k, v) for k, v in zip(einputs[len(inputs):], eoutputs[len(outputs):])))
inputs, outputs = einputs[:len(inputs)], eoutputs[:len(outputs)]
# nin = len(inputs)
# nout = len(outputs)
# k, v = zip(*updates.items()) if updates else ((), ())
# nup = len(k)
# eff_in = tuple(inputs) + tuple(k)
# eff_out = tuple(outputs) + tuple(v)
# supp_in = tuple(gof.graph.inputs(eff_out))
# env = gof.Env(*gof.graph.clone(eff_in + supp_in,
# eff_out))
# sup = F.Supervisor(set(env.inputs).difference(env.inputs[len(inputs):len(eff_in)]))
# env.extend(sup)
# mode.optimizer.optimize(env)
# inputs, outputs, updates = env.inputs[:nin], env.outputs[:nout], dict(zip(env.inputs[nin:], env.outputs[nout:]))
rval += pprint(inputs, outputs, updates, False)
return rval
......@@ -898,21 +885,15 @@ class KitComponent(Component):
the memo that maps the SymbolicInputKit to the list of
Containers.
"""
kit = self.kit
if kit in memo:
return memo[kit]
containers = []
for input in kit.sinputs:
for input in self.kit.sinputs:
r = input.result
if r not in memo:
memo[r] = gof.Container(r, storage = [None])
containers.append(memo[r])
#containers.append(gof.Container(r, storage = [None]))
memo[kit] = containers
return containers
input = copy(input)
input.value = gof.Container(r, storage = [None])
memo[r] = input
def build(self, mode, memo):
return memo[self.kit]
return [memo[i.result].value for i in self.kit.sinputs]
......
......@@ -9,6 +9,7 @@ from .. import compile
from ..compile import SymbolicInputKit, SymbolicInput
from copy import copy
import sys
RS = numpy.random.RandomState
......@@ -50,6 +51,7 @@ class RandomFunction(gof.Op):
def perform(self, node, inputs, (rout, out)):
r, shape, args = inputs[0], inputs[1], inputs[2:]
r_orig = r
assert self.outtype.ndim == len(shape)
if not self.inplace:
r = copy(r)
......@@ -252,12 +254,23 @@ class RModule(compile.Module):
return x
def _instance_seed(self, inst, seed, recursive = True):
seedgen = numpy.random.RandomState(seed)
if recursive:
#Here, we recurse through all the components (inst2) contained in (inst)
#and seeds each subcomponent that is an RModule
for path, c in self.flat_components_map(True):
if isinstance(c, RModule):
inst2 = inst
for name in path:
inst2 = inst2[name]
c._rkit.kit.distribute(seed, xrange(len(inst._rkit)), inst2._rkit)
# A Kit (c._rkit.kit) contains a list of io.SymbolicIn instances
# and the distribute method takes a value (seed), a list of indices
# and a list of corresponding gof.Container instances. In this
# situation it will reseed all the rngs using the containers
# associated to them.
c._rkit.kit.distribute(seedgen.random_integers(sys.maxint-1),
xrange(len(inst2._rkit)), inst2._rkit)
else:
self._rkit.kit.distribute(seed, xrange(len(inst._rkit)), inst._rkit)
self._rkit.kit.distribute(seedgen.random_integers(sys.maxint-1), xrange(len(inst._rkit)), inst._rkit)
......@@ -227,84 +227,85 @@ class T_real_matrix(TestCase):
self.failUnless(_is_real_matrix(T.DimShuffle([False,False], [1, 0])(T.dmatrix())))
self.failUnless(not _is_real_matrix(T.DimShuffle([False], ['x', 0])(T.dvector())))
class T_gemm_opt(TestCase):
"""This test suite ensures that Gemm is inserted where it belongs, and that the resulting
functions compute the same things as the originals."""
def XYZab(self):
return T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
def just_gemm(self, i, o, ishapes = [(4,3), (3,5), (4,5), (), ()]):
def on_fail():
for node in f.maker.env.toposort():
print 'GRAPH', node
self.fail()
f = function([In(ii, mutable=True) for ii in i],o, mode='FAST_RUN')
for node in f.maker.env.nodes:
if node.op == T.dot: on_fail()
if node.op == _dot22: on_fail()
g = function(i, o, mode='FAST_COMPILE')
for node in g.maker.env.nodes:
if node.op == gemm: on_fail()
rng = numpy.random.RandomState(234)
r0 = f(*[rng.randn(*sh) for sh in ishapes])
rng = numpy.random.RandomState(234)
r1 = g(*[rng.randn(*sh) for sh in ishapes])
if numpy.max(numpy.abs(r0[0] - r1[0])) > 1.0e-8:
self.fail()
def test0(self):
"""Many subgraphs whose dots can be eliminated"""
X,Y,Z,a,b = self.XYZab()
self.just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a + Z * b])
self.just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) + b * Z])
self.just_gemm([X,Y,Z,a,b], [b * Z + a * T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a - Z * b])
self.just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) - b * Z])
self.just_gemm([X,Y,Z,a,b], [b * Z - a * T.dot(X,Y)])
#with transposes (transposes should be pushed through dot in canonicalize)
self.just_gemm([X,Y,Z,a,b], [b * Z.T - a * T.dot(Y.T,X.T)])
self.just_gemm([X,Y,Z,a,b], [b * Z.T + a * b * T.dot(X,Y).T])
#with N multiplications instead of just one
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) * b])
self.just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z*b + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z + a*b*a*T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a - (a * a) * T.dot(X,Y) * b])
self.just_gemm([X,Y,Z,a,b], [Z - T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z*b - T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z - a*b*a*T.dot(X,Y)])
# with > 2 terms in the overall addition
self.just_gemm([X,Y,Z,a,b], [Z + Z + T.dot(X,Y) + Z])
def test_double_gemm(self):
"""This is the pattern that shows up in the autoencoder"""
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
R, S, c = T.dmatrix(), T.dmatrix(), T.dscalar()
self.just_gemm([X,Y,Z,a,b, R, S, c], [Z *c + a * T.dot(X,Y) + b * T.dot(R,S).T],
ishapes=[(4,3), (3,5), (4,5), (), (), (5,9), (9,4), ()])
def wishlist(self):
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
#with >2 additions of the same T.dot(X,Y term
self.just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y) + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) + b * T.dot(X,Y)])
def test_vector_stuff(self):
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
u,v = T.dvector(), T.dvector()
f = function([a, u, v], a + T.dot(u,v), mode='FAST_RUN')
self.failIf(gemm in [n.op for n in f.maker.env.nodes])
f = function([a, u, X,Y], a * u + T.dot(X,Y), mode='FAST_RUN')
self.failIf(gemm in [n.op for n in f.maker.env.nodes])
if JOSEPHS_BUG_SOLVED:
class T_gemm_opt(TestCase):
"""This test suite ensures that Gemm is inserted where it belongs, and that the resulting
functions compute the same things as the originals."""
def XYZab(self):
return T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
def just_gemm(self, i, o, ishapes = [(4,3), (3,5), (4,5), (), ()]):
def on_fail():
for node in f.maker.env.toposort():
print 'GRAPH', node
self.fail()
f = function([In(ii, mutable=True) for ii in i],o, mode='FAST_RUN')
for node in f.maker.env.nodes:
if node.op == T.dot: on_fail()
if node.op == _dot22: on_fail()
g = function(i, o, mode='FAST_COMPILE')
for node in g.maker.env.nodes:
if node.op == gemm: on_fail()
rng = numpy.random.RandomState(234)
r0 = f(*[rng.randn(*sh) for sh in ishapes])
rng = numpy.random.RandomState(234)
r1 = g(*[rng.randn(*sh) for sh in ishapes])
if numpy.max(numpy.abs(r0[0] - r1[0])) > 1.0e-8:
self.fail()
def test0(self):
"""Many subgraphs whose dots can be eliminated"""
X,Y,Z,a,b = self.XYZab()
self.just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a + Z * b])
self.just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) + b * Z])
self.just_gemm([X,Y,Z,a,b], [b * Z + a * T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a - Z * b])
self.just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) - b * Z])
self.just_gemm([X,Y,Z,a,b], [b * Z - a * T.dot(X,Y)])
#with transposes (transposes should be pushed through dot in canonicalize)
self.just_gemm([X,Y,Z,a,b], [b * Z.T - a * T.dot(Y.T,X.T)])
self.just_gemm([X,Y,Z,a,b], [b * Z.T + a * b * T.dot(X,Y).T])
#with N multiplications instead of just one
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) * b])
self.just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z*b + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z + a*b*a*T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a - (a * a) * T.dot(X,Y) * b])
self.just_gemm([X,Y,Z,a,b], [Z - T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z*b - T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [Z - a*b*a*T.dot(X,Y)])
# with > 2 terms in the overall addition
self.just_gemm([X,Y,Z,a,b], [Z + Z + T.dot(X,Y) + Z])
def test_double_gemm(self):
"""This is the pattern that shows up in the autoencoder"""
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
R, S, c = T.dmatrix(), T.dmatrix(), T.dscalar()
self.just_gemm([X,Y,Z,a,b, R, S, c], [Z *c + a * T.dot(X,Y) + b * T.dot(R,S).T],
ishapes=[(4,3), (3,5), (4,5), (), (), (5,9), (9,4), ()])
def wishlist(self):
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
#with >2 additions of the same T.dot(X,Y term
self.just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y) + T.dot(X,Y)])
self.just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) + b * T.dot(X,Y)])
def test_vector_stuff(self):
X,Y,Z,a,b = T.dmatrix(), T.dmatrix(), T.dmatrix(), T.dscalar(), T.dscalar()
u,v = T.dvector(), T.dvector()
f = function([a, u, v], a + T.dot(u,v), mode='FAST_RUN')
self.failIf(gemm in [n.op for n in f.maker.env.nodes])
f = function([a, u, X,Y], a * u + T.dot(X,Y), mode='FAST_RUN')
self.failIf(gemm in [n.op for n in f.maker.env.nodes])
......@@ -188,7 +188,7 @@ class QuadraticDenoisingAA(T.RModule):
obj.qfilters = [R.uniform(size = sz, low = -inf, high = inf) * qfilter_relscale \
for qf in self.qfilters]
if seed is not None:
obj.seed(seed)
obj.seed(seed, recursive=True)
obj.lr = lr
......@@ -438,7 +438,7 @@ def create(window_size=3,
concatenated_representation_size=7,
lr=0.01,
seed=123,
noise_level=0.01,
noise_level=0.2,
qfilter_relscale=0.1,
compile_mode=None):
""" Create a convolutional model. """
......@@ -457,40 +457,36 @@ def create(window_size=3,
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
return model
from theano import gof
JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
print 'JTEST', JTEST
theano.compile.register_optimizer('JTEST', JTEST)
from unittest import TestCase
class T_bla(TestCase):
def test0(self):
optimizer = 'fast_compile'
m = create(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer))
if __name__ == '__main__':
optimizer = eval(sys.argv[1])
m = create(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer))
prog_str = []
for i, node in enumerate(m.finetuning_update.maker.env.toposort()):
#print ' ', i, node
prog_str.append(str(node))
print "PROGRAM LEN %i HASH %i"% (len(m.finetuning_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
N.random.seed = 8324
rng = N.random.RandomState(23904)
inputs = [N.random.rand(1,9) for i in 1,2,3]
targets = N.asarray([0])
inputs = [rng.rand(10,9) for i in 1,2,3]
targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
#print inputs
print 'unsupervised phase'
print 'UNSUPERVISED PHASE'
for i in xrange(10):
for i in xrange(10):
m.pretraining_update(*inputs)
print m.pretraining_update(*inputs)
print m.pretraining_update(*inputs)
print m.pretraining_update(*inputs)
print m.pretraining_update(*inputs)
print m.pretraining_update(*inputs)
print 'FINETUNING GRAPH'
for i, node in enumerate(m.finetuning_update.maker.env.toposort()):
print ' ', i, node
print """SUPERVISED PHASE COSTS (fast_compile):
2.07944154168
2.07818967136
2.07693824526
2.07568725592
2.07443669587"""
print 'SUPERVISED PHASE COSTS (%s)'%optimizer
print m.finetuning_update(*(inputs + [targets])) #the 0 is the target
print 'FINETUNING GRAPH'
print 'SUPERVISED PHASE COSTS (%s)'%optimizer
for i in xrange(10):
for i in xrange(10):
m.finetuning_update(*(inputs + [targets])) #the 0 is the target
print m.finetuning_update(*(inputs + [targets])) #the 0 is the target
print m.finetuning_update(*(inputs + [targets])) #the 0 is the target
print m.finetuning_update(*(inputs + [targets])) #the 0 is the target
print m.finetuning_update(*(inputs + [targets])) #the 0 is the target
self.fail('just failing to get stdout and stderr')
## TODO: REDO THESE TESTS
import unittest
import numpy as N
from theano.tensor.raw_random import *
from theano import compile
from theano import tensor
from theano import compile, gof
def test_state_propagation():
x = tensor.vector()
rk = RandomKit('rk', 1000)
f = compile.function([x, (rk, [gof.Container(r = gof.generic, storage = [123], name='bla')])], rk.binomial(tensor.shape(x)), mode='FAST_COMPILE')
f['rk'] = 9873456
rvals = [f([1,2,3,4,6, 7, 8]) for i in xrange(5)]
print rvals
for i in xrange(5-1):
for j in xrange(i+1, 5):
assert not N.all(rvals[i] == rvals[j])
def test_B():
"""Test that random numbers change from call to call!
Also, make sure that the seeding strategy doesn't change without failing a test.
Random numbers can't be too random or experiments aren't repeatable. Email theano-dev
before updating the `rvals` in this test.
"""
class B(RModule):
def __init__(self):
super(B, self).__init__(self)
self.x = compile.Member(tensor.dvector())
self.r = self.random.uniform(tensor.shape(self.x))
self.f = compile.Method([self.x], self.r)
class E(RModule):
def __init__(self):
super(E, self).__init__(self)
self.b = B()
self.f = compile.Method([self.b.x], self.b.r)
b = E()
m = b.make(mode='FAST_COMPILE')
m.seed(1000)
#print m.f(N.ones(5))
#print m.f(N.ones(5))
#print m.f(N.ones(5))
rvals = ["0.0655889727823 0.566937256035 0.486897708861 0.939594224804 0.731948448071",
"0.407174827663 0.450046718267 0.454825370073 0.874814293401 0.828759935744",
"0.573194634066 0.746015418896 0.864696705461 0.8405810785 0.540268740918",
"0.924477905238 0.96687901023 0.306490321744 0.654349923901 0.789402591813",
"0.513182053208 0.0426565286449 0.0723651478047 0.454308519009 0.86151064181"]
for i in xrange(5):
s = " ".join([str(n) for n in m.f(N.ones(5))])
assert s == rvals[i]
def Uniform(s, n):
return NumpyGenerator(s, n, numpy.random.RandomState.uniform)
class T_Random:#(unittest.TestCase):
def test0(self):
rng = Uniform(12345, 2)
r0 = rng((2,3))
r1 = rng((2,3))
f0 = compile.function([], [r0])
f1 = compile.function([], [r1])
v0 = f0()
self.failUnless(v0.shape == (2,3))
self.failUnless(str(v0[0,0]).startswith('0.929616'))
self.failUnless(str(v0[1,2]).startswith('0.595544'))
v1 = f1()
self.failUnless(numpy.all(v0 == v1))
v1 = f1()
self.failUnless(numpy.all(v0 != v1))
def test1(self):
rng = RandomState(12345)
f0 = compile.function([], [rng.gen('uniform', (3,))])
f1 = compile.function([], [rng.gen('uniform', (3,))])
v0, v1 = f0(), f1()
self.failUnless(v0.shape == (3,))
self.failUnless(numpy.all(v0 != v1))
def test2(self):
x = tensor.ivector()
f0 = compile.function([x], [Uniform(123, 1)(x)])
f1 = compile.function([x], [Uniform(123, 1)(x)])
v0, v1 = f0([3]), f1([7])
self.failUnless(v0.shape == (3,))
self.failUnless(numpy.all(v0 == v1[:3]))
def test3(self):
rng = RandomState(12345)
template = tensor.fmatrix()
f0 = compile.function([template], [rng.gen_like('uniform', template)])
v0 = f0(numpy.zeros((2,3)))
self.failUnless(str(v0[1,2]).startswith('0.595544'))
def test4(self):
rng = RandomState(123455)
template = tensor.fmatrix()
f0 = compile.function([template],
[rng.gen_like(('beta',{'a':0.5,'b':0.65}), template)])
v0 = f0(numpy.zeros((2,3)))
self.failUnless(v0.shape == (2,3))
self.failUnless(str(v0[0,0]).startswith('0.013259'))
self.failUnless(str(v0[1,2]).startswith('0.753368'))
def test5(self):
"""Test that two NumpyGenerators with the same dist compare equal"""
rng0 = RandomState(123456)
rng1 = RandomState(123456)
d0 = rng0.gen(('beta',{'a':0.5,'b':0.65}), (2,3,4))
d1 = rng1.gen(('beta',{'a':0.5,'b':0.65}), (2,3,4))
self.failUnless(d0.owner.op == d1.owner.op)
self.failUnless(hash(d0.owner.op) == hash(d1.owner.op))
def test6(self):
x = tensor.vector()
u = RandomState(9999).uniform_like(x,0.,10.)
fu = compile.function([x],[u])
res1 = fu(numpy.zeros((3)))
res2 = fu(numpy.zeros((3)))
self.failUnless(str(res1[0]).startswith('8.23389'))
self.failUnless(str(res2[0]).startswith('5.45926'))
b = RandomState(121212).binomial_like(x,1,0.8)
fb = compile.function([x],[b])
res1 = fb(numpy.zeros((10)))
res2 = fb(numpy.zeros((10)))
self.failUnless(list(res1) == [1,0,1,1,1,1,1,1,1,1])
self.failUnless(list(res2) == [1,1,0,1,1,1,0,0,1,1])
if __name__ == '__main__':
unittest.main()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论