提交 42809e8b authored 作者: lamblin's avatar lamblin

Merge pull request #630 from nouiz/pycuda_init

Pycuda init
...@@ -22,6 +22,8 @@ Bug fixes ...@@ -22,6 +22,8 @@ Bug fixes
* Fixed many subtle bugs involving mutable default arguments which may have * Fixed many subtle bugs involving mutable default arguments which may have
led to unexpected behaviour, such as objects sharing instance variables led to unexpected behaviour, such as objects sharing instance variables
they were not supposed to share. (David W-F) they were not supposed to share. (David W-F)
* Correctly record the GPU device number used when we let the driver select it.
(Frederic B.)
Documentation Documentation
* Added in the tutorial documentation on how to extend Theano. * Added in the tutorial documentation on how to extend Theano.
......
...@@ -22,9 +22,9 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -22,9 +22,9 @@ class T_OpFromGraph(unittest.TestCase):
xv = numpy.ones((2, 2), dtype=config.floatX) xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3 yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5 zv = numpy.ones((2, 2), dtype=config.floatX)*5
print function, function.__module__ #print function, function.__module__
print fn.maker.env.toposort() #print fn.maker.env.toposort()
print fn(xv, yv, zv) fn(xv, yv, zv)
assert numpy.all(8.0 == fn(xv, yv, zv)) assert numpy.all(8.0 == fn(xv, yv, zv))
assert numpy.all(8.0 == fn(xv, yv, zv)) assert numpy.all(8.0 == fn(xv, yv, zv))
......
...@@ -13,7 +13,7 @@ import unittest ...@@ -13,7 +13,7 @@ import unittest
def test0(): def test0():
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], ((2. * x) + 7) / 2., mode=debugmode.DebugMode()) f = theano.function([x], ((2. * x) + 7) / 2., mode=debugmode.DebugMode())
print f([1, 2]) f([1, 2])
class BROKEN_ON_PURPOSE_Add(gof.Op): class BROKEN_ON_PURPOSE_Add(gof.Op):
...@@ -211,7 +211,7 @@ def test_badclinkeroutput(): ...@@ -211,7 +211,7 @@ def test_badclinkeroutput():
try: try:
f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4]) f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4])
except debugmode.BadCLinkerOutput, e: except debugmode.BadCLinkerOutput, e:
print repr(e) #print repr(e)
assert e.r.owner.op is inconsistent assert e.r.owner.op is inconsistent
return # TEST PASS return # TEST PASS
...@@ -490,7 +490,7 @@ class Test_ViewMap(unittest.TestCase): ...@@ -490,7 +490,7 @@ class Test_ViewMap(unittest.TestCase):
f([1, 2, 3, 4], [5, 6, 7, 8]) f([1, 2, 3, 4], [5, 6, 7, 8])
assert False # DebugMode should have caught the error assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e: except debugmode.BadViewMap, e:
print e #print e
pass pass
# the situation can be rescued by picking one of the inputs and # the situation can be rescued by picking one of the inputs and
...@@ -554,7 +554,7 @@ class Test_check_isfinite(unittest.TestCase): ...@@ -554,7 +554,7 @@ class Test_check_isfinite(unittest.TestCase):
#inf should go through #inf should go through
infs = numpy.asarray([1.0, 1., 1.]) / 0 infs = numpy.asarray([1.0, 1., 1.]) / 0
print infs #print infs
f(infs) f(infs)
return return
...@@ -576,11 +576,11 @@ class BrokenCImplementationAdd(gof.Op): ...@@ -576,11 +576,11 @@ class BrokenCImplementationAdd(gof.Op):
return r return r
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
print 'executing python perform' #print 'executing python perform'
a, b = inp a, b = inp
out, = out_ out, = out_
z = a + b z = a + b
print 'out[0] was:', out[0] #print 'out[0] was:', out[0]
out[0] = z out[0] = z
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -671,8 +671,8 @@ class Test_preallocated_output(unittest.TestCase): ...@@ -671,8 +671,8 @@ class Test_preallocated_output(unittest.TestCase):
f = theano.function([a, b], out, mode='DEBUG_MODE') f = theano.function([a, b], out, mode='DEBUG_MODE')
out_val = f(a_val, b_val) out_val = f(a_val, b_val)
print 'out_val =', out_val #print 'out_val =', out_val
print out_val.strides #print out_val.strides
# Should work for now (0.4.0), because the C thunk does not care # Should work for now (0.4.0), because the C thunk does not care
# at all of what is in storage_map initially. # at all of what is in storage_map initially.
...@@ -682,8 +682,8 @@ class Test_preallocated_output(unittest.TestCase): ...@@ -682,8 +682,8 @@ class Test_preallocated_output(unittest.TestCase):
f = theano.function([a, b], out, mode='DEBUG_MODE') f = theano.function([a, b], out, mode='DEBUG_MODE')
out_val = f(a_val, b_val) out_val = f(a_val, b_val)
print 'out_val =', out_val #print 'out_val =', out_val
print out_val.strides #print out_val.strides
finally: finally:
config.DebugMode.check_preallocated_output = init_conf_val config.DebugMode.check_preallocated_output = init_conf_val
...@@ -307,7 +307,7 @@ class T_function(unittest.TestCase): ...@@ -307,7 +307,7 @@ class T_function(unittest.TestCase):
def test_constant_output(self): def test_constant_output(self):
# Test that if the output is a constant, we respect the theano memory interface # Test that if the output is a constant, we respect the theano memory interface
f = theano.function([],theano.tensor.constant([4])) f = theano.function([],theano.tensor.constant([4]))
print f.maker.env.toposort() #print f.maker.env.toposort()
out = f() out = f()
assert (out==4).all() assert (out==4).all()
out[0]=3 out[0]=3
...@@ -318,7 +318,7 @@ class T_function(unittest.TestCase): ...@@ -318,7 +318,7 @@ class T_function(unittest.TestCase):
# Test that if the output is a constant and borrow, we respect the theano memory interface # Test that if the output is a constant and borrow, we respect the theano memory interface
f = theano.function([],Out(theano.tensor.constant([4]), borrow=True)) f = theano.function([],Out(theano.tensor.constant([4]), borrow=True))
print f.maker.env.toposort() #print f.maker.env.toposort()
out = f() out = f()
assert (out==4).all() assert (out==4).all()
out[0]=3 out[0]=3
...@@ -412,8 +412,8 @@ class T_picklefunction(unittest.TestCase): ...@@ -412,8 +412,8 @@ class T_picklefunction(unittest.TestCase):
self.assertFalse(x in g.container) self.assertFalse(x in g.container)
self.assertFalse(x in g.value) self.assertFalse(x in g.value)
self.assertTrue(len(f.defaults) == len(g.defaults)) self.assertTrue(len(f.defaults) == len(g.defaults))
print 'f.defaults = %s' % (f.defaults, ) #print 'f.defaults = %s' % (f.defaults, )
print 'g.defaults = %s' % (g.defaults, ) #print 'g.defaults = %s' % (g.defaults, )
self.assertTrue(all([f_req == g_req and f_feed == g_feed and self.assertTrue(all([f_req == g_req and f_feed == g_feed and
f_val == g_val f_val == g_val
for ((f_req, f_feed, f_val), (g_req, g_feed, g_val)) in zip( for ((f_req, f_feed, f_val), (g_req, g_feed, g_val)) in zip(
......
...@@ -187,7 +187,7 @@ class ExampleRNN(Module): ...@@ -187,7 +187,7 @@ class ExampleRNN(Module):
self.minimizer = minimizer([x, y], self.cost, self.params) self.minimizer = minimizer([x, y], self.cost, self.params)
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'INITIALIZE EXAMPLE RNN' #print 'INITIALIZE EXAMPLE RNN'
n_vis = self.n_vis n_vis = self.n_vis
rng = N.random.RandomState(unittest_tools.fetch_seed(2342)) rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
...@@ -214,7 +214,7 @@ def test_example_rnn(): ...@@ -214,7 +214,7 @@ def test_example_rnn():
LAG = 4 LAG = 4
y[LAG:] = x[:-LAG, 0:n_out] y[LAG:] = x[:-LAG, 0:n_out]
if 1: if 0:
for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()): for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()):
print i, node print i, node
...@@ -223,9 +223,6 @@ def test_example_rnn(): ...@@ -223,9 +223,6 @@ def test_example_rnn():
niter=30 niter=30
for i in xrange(niter): for i in xrange(niter):
if i % 100 == 0:
print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
else:
rnn.minimizer.step_cost(x, y) rnn.minimizer.step_cost(x, y)
if theano.config.mode=='DEBUG_MODE': if theano.config.mode=='DEBUG_MODE':
assert rnn.minimizer.step_cost(x,y) < -.9 #it starts around -.28 assert rnn.minimizer.step_cost(x,y) < -.9 #it starts around -.28
...@@ -258,7 +255,7 @@ def test_WEIRD_STUFF(): ...@@ -258,7 +255,7 @@ def test_WEIRD_STUFF():
# rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work # rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work
# rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail # rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail
m = Mode('py', 'fast_run') m = Mode('py', 'fast_run')
for n in m.optimizer: print n.name # for n in m.optimizer: print n.name
if 0: if 0:
topo1=rnn1.minimizer.step_cost.maker.env.toposort() topo1=rnn1.minimizer.step_cost.maker.env.toposort()
...@@ -266,7 +263,7 @@ def test_WEIRD_STUFF(): ...@@ -266,7 +263,7 @@ def test_WEIRD_STUFF():
for i in range(len(topo1)): for i in range(len(topo1)):
print '1',i, topo1[i] print '1',i, topo1[i]
print '2',i, topo2[i] print '2',i, topo2[i]
if 1: if 0:
topo1=rnn1.minimizer.step.maker.env.toposort() topo1=rnn1.minimizer.step.maker.env.toposort()
topo2=rnn2.minimizer.step.maker.env.toposort() topo2=rnn2.minimizer.step.maker.env.toposort()
for i in range(len(topo1)): for i in range(len(topo1)):
...@@ -274,10 +271,10 @@ def test_WEIRD_STUFF(): ...@@ -274,10 +271,10 @@ def test_WEIRD_STUFF():
print '2',i, topo2[i] print '2',i, topo2[i]
import theano.printing import theano.printing
print len(rnn1.minimizer.step.maker.inputs) #print len(rnn1.minimizer.step.maker.inputs)
print len(rnn2.minimizer.step.maker.inputs) #print len(rnn2.minimizer.step.maker.inputs)
print rnn1.minimizer.step.maker.inputs #print rnn1.minimizer.step.maker.inputs
print rnn2.minimizer.step.maker.inputs #print rnn2.minimizer.step.maker.inputs
...@@ -293,15 +290,15 @@ def test_WEIRD_STUFF(): ...@@ -293,15 +290,15 @@ def test_WEIRD_STUFF():
niter=3 niter=3
for i in xrange(niter): for i in xrange(niter):
print rnn1.minimizer.step_cost(x, y) #print rnn1.minimizer.step_cost(x, y)
print rnn2.minimizer.step_cost(x, y) #print rnn2.minimizer.step_cost(x, y)
# assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out # assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out
assert (N.abs(rnn1.z0-rnn2.z0)<1e-8).all() assert (N.abs(rnn1.z0-rnn2.z0)<1e-8).all()
print (N.abs(rnn1.w-rnn2.w)<1e-8).all() #print (N.abs(rnn1.w-rnn2.w)<1e-8).all()
print (N.abs(rnn1.w-rnn2.w)) #print (N.abs(rnn1.w-rnn2.w))
print rnn1.w #print rnn1.w
print rnn2.w #print rnn2.w
assert (N.abs(rnn1.w-rnn2.w)<1e-8).all() assert (N.abs(rnn1.w-rnn2.w)<1e-8).all()
# assert b # assert b
......
...@@ -18,7 +18,7 @@ class NNet(object): ...@@ -18,7 +18,7 @@ class NNet(object):
self.lr = shared(lr, 'learning_rate') self.lr = shared(lr, 'learning_rate')
self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
print self.lr.type #print self.lr.type
self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.hidden = sigmoid(tensor.dot(self.w1, self.input))
self.output = tensor.dot(self.w2, self.hidden) self.output = tensor.dot(self.w2, self.hidden)
...@@ -51,7 +51,7 @@ class TestNnet(unittest.TestCase): ...@@ -51,7 +51,7 @@ class TestNnet(unittest.TestCase):
output, cost = nnet.sgd_step(input, target) output, cost = nnet.sgd_step(input, target)
mean_cost += cost mean_cost += cost
mean_cost /= float(len(data)) mean_cost /= float(len(data))
print 'Mean cost at epoch %s: %s' % (epoch, mean_cost) #print 'Mean cost at epoch %s: %s' % (epoch, mean_cost)
self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6) self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6)
# Just call functions to make sure they do not crash. # Just call functions to make sure they do not crash.
out = nnet.compute_output(input) out = nnet.compute_output(input)
......
...@@ -32,7 +32,7 @@ class T_bunch_of_modes(unittest.TestCase): ...@@ -32,7 +32,7 @@ class T_bunch_of_modes(unittest.TestCase):
# test that it runs something # test that it runs something
f([[1, 2], [3, 4]], [5, 6]) f([[1, 2], [3, 4]], [5, 6])
linker_classes_involved.append(f.maker.mode.linker.__class__) linker_classes_involved.append(f.maker.mode.linker.__class__)
print 'MODE:', mode, f.maker.mode.linker, 'stop' # print 'MODE:', mode, f.maker.mode.linker, 'stop'
# regression check: # regression check:
# there should be # there should be
# - VM_Linker # - VM_Linker
......
...@@ -146,7 +146,7 @@ class T_module(unittest.TestCase): ...@@ -146,7 +146,7 @@ class T_module(unittest.TestCase):
#assign 4 and 5 to the two variables' containers in m #assign 4 and 5 to the two variables' containers in m
m.l = [4, 5] m.l = [4, 5]
print 'm.f', m.f() m.f()
assert numpy.all(5 == m.f()) assert numpy.all(5 == m.f())
assert numpy.all(4 == m.g()) assert numpy.all(4 == m.g())
...@@ -189,9 +189,9 @@ class T_module(unittest.TestCase): ...@@ -189,9 +189,9 @@ class T_module(unittest.TestCase):
assert 5 == m.f() assert 5 == m.f()
assert 4 == m.g() assert 4 == m.g()
print 'dscalar test' #print 'dscalar test'
local_test(lambda:T.dscalar(),lambda:T.dscalar()) local_test(lambda:T.dscalar(),lambda:T.dscalar())
print 'value test' #print 'value test'
local_test(lambda:T.value(1),lambda:T.value(2)) local_test(lambda:T.value(1),lambda:T.value(2))
...@@ -494,9 +494,9 @@ class T_module(unittest.TestCase): ...@@ -494,9 +494,9 @@ class T_module(unittest.TestCase):
M.a = [1,2,3] M.a = [1,2,3]
M.make() M.make()
m = M.make() m = M.make()
print m.a #print m.a
print m.a[0], type(m.a[0]), m.a[0] == 1 #print m.a[0], type(m.a[0]), m.a[0] == 1
print list(m.a) #print list(m.a)
assert list(m.a) == [1,2,3] assert list(m.a) == [1,2,3]
assert m.a is not M.a assert m.a is not M.a
try: try:
...@@ -545,7 +545,8 @@ def test_multiple_references(): ...@@ -545,7 +545,8 @@ def test_multiple_references():
self.sub_module = sub_module self.sub_module = sub_module
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'Initializing A' pass
#print 'Initializing A'
class B(theano.Module): class B(theano.Module):
...@@ -555,7 +556,8 @@ def test_multiple_references(): ...@@ -555,7 +556,8 @@ def test_multiple_references():
self.sub_module = sub_module self.sub_module = sub_module
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'Initializing B' pass
#print 'Initializing B'
class C(theano.Module): class C(theano.Module):
...@@ -565,11 +567,11 @@ def test_multiple_references(): ...@@ -565,11 +567,11 @@ def test_multiple_references():
self.value = theano.tensor.scalar() self.value = theano.tensor.scalar()
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'Initializing C' #print 'Initializing C'
obj.value = 0 obj.value = 0
def _instance_set(self, obj, value): def _instance_set(self, obj, value):
print 'Setting C' #print 'Setting C'
obj.value = value obj.value = value
...@@ -584,7 +586,7 @@ def test_multiple_references(): ...@@ -584,7 +586,7 @@ def test_multiple_references():
self.bug = theano.tensor.scalar() self.bug = theano.tensor.scalar()
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'Initializing D' #print 'Initializing D'
obj.c.set(1) obj.c.set(1)
......
...@@ -369,7 +369,6 @@ class Test_pfunc(unittest.TestCase): ...@@ -369,7 +369,6 @@ class Test_pfunc(unittest.TestCase):
z: (((x * 5) + y) ** z)}) z: (((x * 5) + y) ** z)})
up() up()
print x.get_value(borrow=True)
assert numpy.all(x.get_value() == 20) assert numpy.all(x.get_value() == 20)
assert numpy.all(y.get_value() == 24) assert numpy.all(y.get_value() == 24)
assert numpy.all(z.get_value() == (24 ** 2)) assert numpy.all(z.get_value() == (24 ** 2))
...@@ -380,7 +379,6 @@ class Test_pfunc(unittest.TestCase): ...@@ -380,7 +379,6 @@ class Test_pfunc(unittest.TestCase):
f = pfunc([], [x]) f = pfunc([], [x])
f() f()
print x.get_value()
assert x.get_value() == 1 assert x.get_value() == 1
del x.default_update del x.default_update
...@@ -399,32 +397,26 @@ class Test_pfunc(unittest.TestCase): ...@@ -399,32 +397,26 @@ class Test_pfunc(unittest.TestCase):
# Test that the default update is taken into account in the right cases # Test that the default update is taken into account in the right cases
f1 = pfunc([], [x], no_default_updates=True) f1 = pfunc([], [x], no_default_updates=True)
f1() f1()
print x.get_value()
assert x.get_value() == 0 assert x.get_value() == 0
f2 = pfunc([], [x], no_default_updates=[x]) f2 = pfunc([], [x], no_default_updates=[x])
f2() f2()
print x.get_value()
assert x.get_value() == 0 assert x.get_value() == 0
f3 = pfunc([], [x], no_default_updates=[x, y]) f3 = pfunc([], [x], no_default_updates=[x, y])
f3() f3()
print x.get_value()
assert x.get_value() == 0 assert x.get_value() == 0
f4 = pfunc([], [x], no_default_updates=[y]) f4 = pfunc([], [x], no_default_updates=[y])
f4() f4()
print x.get_value()
assert x.get_value() == 2 assert x.get_value() == 2
f5 = pfunc([], [x], no_default_updates=[]) f5 = pfunc([], [x], no_default_updates=[])
f5() f5()
print x.get_value()
assert x.get_value() == 4 assert x.get_value() == 4
f5 = pfunc([], [x], no_default_updates=False) f5 = pfunc([], [x], no_default_updates=False)
f5() f5()
print x.get_value()
assert x.get_value() == 6 assert x.get_value() == 6
self.assertRaises(TypeError, pfunc, [], [x], no_default_updates=(x)) self.assertRaises(TypeError, pfunc, [], [x], no_default_updates=(x))
...@@ -435,32 +427,26 @@ class Test_pfunc(unittest.TestCase): ...@@ -435,32 +427,26 @@ class Test_pfunc(unittest.TestCase):
# Mix explicit updates and no_default_updates # Mix explicit updates and no_default_updates
g1 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=True) g1 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=True)
g1() g1()
print x.get_value()
assert x.get_value() == 5 assert x.get_value() == 5
g2 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x]) g2 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x])
g2() g2()
print x.get_value()
assert x.get_value() == 4 assert x.get_value() == 4
g3 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x, y]) g3 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x, y])
g3() g3()
print x.get_value()
assert x.get_value() == 3 assert x.get_value() == 3
g4 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[y]) g4 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[y])
g4() g4()
print x.get_value()
assert x.get_value() == 2 assert x.get_value() == 2
g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[]) g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[])
g5() g5()
print x.get_value()
assert x.get_value() == 1 assert x.get_value() == 1
g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=False) g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=False)
g5() g5()
print x.get_value()
assert x.get_value() == 0 assert x.get_value() == 0
def test_default_updates_expressions(self): def test_default_updates_expressions(self):
...@@ -473,17 +459,14 @@ class Test_pfunc(unittest.TestCase): ...@@ -473,17 +459,14 @@ class Test_pfunc(unittest.TestCase):
f1 = pfunc([a], z) f1 = pfunc([a], z)
f1(12) f1(12)
print x
assert x.get_value() == 1 assert x.get_value() == 1
f2 = pfunc([a], z, no_default_updates=True) f2 = pfunc([a], z, no_default_updates=True)
assert f2(7) == 7 assert f2(7) == 7
print x
assert x.get_value() == 1 assert x.get_value() == 1
f3 = pfunc([a], z, no_default_updates=[x]) f3 = pfunc([a], z, no_default_updates=[x])
assert f3(9) == 9 assert f3(9) == 9
print x
assert x.get_value() == 1 assert x.get_value() == 1
def test_default_updates_multiple(self): def test_default_updates_multiple(self):
...@@ -524,7 +507,6 @@ class Test_pfunc(unittest.TestCase): ...@@ -524,7 +507,6 @@ class Test_pfunc(unittest.TestCase):
f1 = pfunc([], [x]) f1 = pfunc([], [x])
f1() f1()
print x.get_value(), y.get_value(), z.get_value()
assert x.get_value() == 1 assert x.get_value() == 1
assert y.get_value() == -1 assert y.get_value() == -1
assert z.get_value() == -2 assert z.get_value() == -2
...@@ -598,10 +580,8 @@ class Test_pfunc(unittest.TestCase): ...@@ -598,10 +580,8 @@ class Test_pfunc(unittest.TestCase):
b = 2 * a b = 2 * a
# Use only the tip of the graph, a is not used # Use only the tip of the graph, a is not used
f = pfunc([b], b) f = pfunc([b], b)
print 'a.get_value() =', a.get_value()
assert a.get_value() == 0 assert a.get_value() == 0
f(21) f(21)
print 'a.get_value() =', a.get_value()
assert a.get_value() == 0 assert a.get_value() == 0
def test_givens_replaces_shared_variable(self): def test_givens_replaces_shared_variable(self):
...@@ -917,7 +897,7 @@ class Test_aliasing_rules(unittest.TestCase): ...@@ -917,7 +897,7 @@ class Test_aliasing_rules(unittest.TestCase):
data_of_b = data_of(B) data_of_b = data_of(B)
f = pfunc([], [], updates=[(A, B[:, ::-1]), (B, A.T)]) f = pfunc([], [], updates=[(A, B[:, ::-1]), (B, A.T)])
theano.printing.debugprint(f) #theano.printing.debugprint(f)
f() f()
# correctness (doesn't actually test the view...) # correctness (doesn't actually test the view...)
assert numpy.all(data_of(A) == -.5) assert numpy.all(data_of(A) == -.5)
...@@ -938,7 +918,6 @@ class Test_aliasing_rules(unittest.TestCase): ...@@ -938,7 +918,6 @@ class Test_aliasing_rules(unittest.TestCase):
assert numpy.all(data_of(B) < 5) assert numpy.all(data_of(B) < 5)
data_of_a += 10 data_of_a += 10
print data_of(B)
assert numpy.all(data_of(B) > 5) assert numpy.all(data_of(B) > 5)
data_of_a -= 10 data_of_a -= 10
......
...@@ -195,8 +195,8 @@ def test_clinker_literal_inlining(): ...@@ -195,8 +195,8 @@ def test_clinker_literal_inlining():
fn = lnk.make_function() fn = lnk.make_function()
assert abs(fn(2.0, 2.0) + 0.12345678) < 1e-9 assert abs(fn(2.0, 2.0) + 0.12345678) < 1e-9
code = lnk.code_gen() code = lnk.code_gen()
print "=== Code generated ===" #print "=== Code generated ==="
print code #print code
assert "4.12345678" in code # we expect the number to be inlined assert "4.12345678" in code # we expect the number to be inlined
......
...@@ -110,22 +110,22 @@ class FailureWatch: ...@@ -110,22 +110,22 @@ class FailureWatch:
def consistent(g): def consistent(g):
print "Testing consistent:", g #print "Testing consistent:", g
try: try:
assert g.consistent() assert g.consistent()
except AssertionError: except AssertionError:
print "Test failed! The graph was marked as NOT consistent." print "Test failed! The graph was marked as NOT consistent."
raise raise
print "Test OK" #print "Test OK"
def inconsistent(g): def inconsistent(g):
print "Testing NOT consistent:", g #print "Testing NOT consistent:", g
try: try:
assert not g.consistent() assert not g.consistent()
except AssertionError: except AssertionError:
print "Test failed! The graph was marked as consistent." print "Test failed! The graph was marked as consistent."
raise raise
print "Test OK" #print "Test OK"
......
...@@ -74,10 +74,10 @@ def test_speed(): ...@@ -74,10 +74,10 @@ def test_speed():
numpy_version(x, steps_a) numpy_version(x, steps_a)
t0 = time.time() t0 = time.time()
print numpy_version(x, steps_a) #print numpy_version(x, steps_a)
t1 = time.time() t1 = time.time()
t2 = time.time() t2 = time.time()
print numpy_version(x, steps_b) #print numpy_version(x, steps_b)
t3 = time.time() t3 = time.time()
t_a = t1 - t0 t_a = t1 - t0
t_b = t3 - t2 t_b = t3 - t2
...@@ -103,15 +103,15 @@ def test_speed(): ...@@ -103,15 +103,15 @@ def test_speed():
#profile='f_b speed test %s'%name, #profile='f_b speed test %s'%name,
) )
print f_a([2.0, 3.0]) f_a([2.0, 3.0])
t0 = time.time() t0 = time.time()
print f_a([2.0, 3.0]) f_a([2.0, 3.0])
t1 = time.time() t1 = time.time()
print f_b([2.0, 3.0]) f_b([2.0, 3.0])
t2 = time.time() t2 = time.time()
print f_b([2.0, 3.0]) f_b([2.0, 3.0])
t3 = time.time() t3 = time.time()
t_a = t1 - t0 t_a = t1 - t0
...@@ -155,15 +155,15 @@ def test_speed_lazy(): ...@@ -155,15 +155,15 @@ def test_speed_lazy():
#profile='f_b lazy ifelse %s'%name, #profile='f_b lazy ifelse %s'%name,
) )
print f_a([2.0]) f_a([2.0])
t0 = time.time() t0 = time.time()
print f_a([2.0]) f_a([2.0])
t1 = time.time() t1 = time.time()
print f_b([2.0]) f_b([2.0])
t2 = time.time() t2 = time.time()
print f_b([2.0]) f_b([2.0])
t3 = time.time() t3 = time.time()
t_a = t1 - t0 t_a = t1 - t0
......
import os import os
import warnings
import theano import theano
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
cuda_ndarray = cuda.cuda_ndarray.cuda_ndarray
def select_gpu_from_theano(): def set_gpu_from_theano():
# Transfer the theano gpu binding to pycuda, for consistency """
theano_to_pycuda_device_map = {"cpu": "0", This set the GPU used by PyCUDA to the same as the one used by Theano.
"gpu0": "0", """
"gpu1": "1", #import pdb;pdb.set_trace()
"gpu2": "2", if cuda.use.device_number is None:
"gpu3": "3"} cuda.use("gpu",
dev = theano_to_pycuda_device_map.get(theano.config.device, "0") force=False,
if theano.config.device == 'gpu': default_to_move_computation_to_gpu=False,
dev = str(cuda.cuda_ndarray.cuda_ndarray.active_device_number()) move_shared_float32_to_gpu=False,
os.environ["CUDA_DEVICE"] = dev enable_cuda=True,
test_driver=True)
select_gpu_from_theano() assert cuda.use.device_number == cuda_ndarray.active_device_number()
# os.environ["CUDA_DEVICE"] = str(cuda.use.device_number)
set_gpu_from_theano()
pycuda_available = False pycuda_available = False
try: if False:
try:
import pycuda import pycuda
import pycuda.autoinit import pycuda.autoinit
pycuda_available = True pycuda_available = True
except ImportError: except ImportError:
# presumably, the user wanted to use pycuda, else they wouldn't have # presumably, the user wanted to use pycuda, else they wouldn't have
# imported this module, so issue a warning that the import failed. # imported this module, so issue a warning that the import failed.
import warnings
warnings.warn("PyCUDA import failed in theano.misc.pycuda_init") warnings.warn("PyCUDA import failed in theano.misc.pycuda_init")
...@@ -313,11 +313,15 @@ def use(device, ...@@ -313,11 +313,15 @@ def use(device,
gpu_init(device) gpu_init(device)
use.device_number = device use.device_number = device
else: else:
# This mean we let the driver select the GPU. # This mean the driver should select the GPU. As we
# But default it is always number 0. # need to get the device number now, we force the
# If the driver is in exclusive mode, it will always show # selection of the GPU by the driver now and then we
# device 0 event if it use something else. # query the active GPU. If we check the active GPU before
use.device_number = 0 # the device is initialized we will always receive 0
# event if another device is selected later.
cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
use.device_number = active_device_number()
if test_driver: if test_driver:
import theano.sandbox.cuda.tests.test_driver import theano.sandbox.cuda.tests.test_driver
theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1() theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
......
...@@ -238,7 +238,7 @@ if 0: ...@@ -238,7 +238,7 @@ if 0:
bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1) bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
r = f(bval)[0] r = f(bval)[0]
# print bval, bval.shape, border # print bval, bval.shape, border
print r, r.shape #print r, r.shape
assert (ret==r).all() assert (ret==r).all()
...@@ -284,7 +284,7 @@ def test_downsample(): ...@@ -284,7 +284,7 @@ def test_downsample():
if float(shp[3]) / ds[1] > 512: if float(shp[3]) / ds[1] > 512:
continue continue
for ignore_border in (True, False): for ignore_border in (True, False):
print 'test_downsample', shp, ds, ignore_border #print 'test_downsample', shp, ds, ignore_border
ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a') a = tcn.shared_constructor(my_rand(*shp), 'a')
......
...@@ -30,7 +30,7 @@ def advantage(cpu_dt, gpu_dt): ...@@ -30,7 +30,7 @@ def advantage(cpu_dt, gpu_dt):
return cpu_dt / gpu_dt return cpu_dt / gpu_dt
def test_host_to_device(): def test_host_to_device():
print >>sys.stdout, 'starting test_host_to_dev' #print >>sys.stdout, 'starting test_host_to_dev'
for shape in ((), (3,), (2,3), (3,4,5,6)): for shape in ((), (3,), (2,3), (3,4,5,6)):
a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
b = cuda_ndarray.CudaNdarray(a) b = cuda_ndarray.CudaNdarray(a)
...@@ -84,7 +84,7 @@ def test_add_iadd_idiv(): ...@@ -84,7 +84,7 @@ def test_add_iadd_idiv():
asum = a0 + a1 asum = a0 + a1
t1 = time.time() t1 = time.time()
cpu_dt = t1 - t0 cpu_dt = t1 - t0
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt) #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
assert numpy.allclose(asum, numpy.asarray(bsum)) assert numpy.allclose(asum, numpy.asarray(bsum))
#test not contiguous version. #test not contiguous version.
...@@ -122,7 +122,7 @@ def test_add_iadd_idiv(): ...@@ -122,7 +122,7 @@ def test_add_iadd_idiv():
a0 += a1 a0 += a1
t1 = time.time() t1 = time.time()
cpu_dt = t1 - t0 cpu_dt = t1 - t0
print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt) #print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
assert numpy.allclose(a0, numpy.asarray(b0)) assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0, a0_orig + a1) assert numpy.allclose(a0, a0_orig + a1)
...@@ -144,7 +144,7 @@ def test_add_iadd_idiv(): ...@@ -144,7 +144,7 @@ def test_add_iadd_idiv():
assert numpy.allclose(a0, ((a0_orig+a1)/a1+a1[..., ::-1])/a1[..., ::-1]) assert numpy.allclose(a0, ((a0_orig+a1)/a1+a1[..., ::-1])/a1[..., ::-1])
def test_exp(): def test_exp():
print >>sys.stdout, 'starting test_exp' #print >>sys.stdout, 'starting test_exp'
for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)): for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32') a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a1 = a0.copy() a1 = a0.copy()
...@@ -158,26 +158,26 @@ def test_exp(): ...@@ -158,26 +158,26 @@ def test_exp():
asum = numpy.exp(a1) asum = numpy.exp(a1)
t1 = time.time() t1 = time.time()
cpu_dt = t1 - t0 cpu_dt = t1 - t0
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt) #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
#c = numpy.asarray(b0+b1) #c = numpy.asarray(b0+b1)
if asum.shape: if asum.shape:
assert numpy.allclose(asum, numpy.asarray(bsum)) assert numpy.allclose(asum, numpy.asarray(bsum))
def test_copy(): def test_copy():
print >>sys.stdout, 'starting test_copy' #print >>sys.stdout, 'starting test_copy'
shape = (500,499) shape = (500,499)
a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
print >>sys.stdout, '.. creating device object' #print >>sys.stdout, '.. creating device object'
b = cuda_ndarray.CudaNdarray(a) b = cuda_ndarray.CudaNdarray(a)
print >>sys.stdout, '.. copy' #print >>sys.stdout, '.. copy'
c = copy.copy(b) c = copy.copy(b)
print >>sys.stdout, '.. deepcopy' #print >>sys.stdout, '.. deepcopy'
d = copy.deepcopy(b) d = copy.deepcopy(b)
print >>sys.stdout, '.. comparisons' #print >>sys.stdout, '.. comparisons'
assert numpy.allclose(a, numpy.asarray(b)) assert numpy.allclose(a, numpy.asarray(b))
assert numpy.allclose(a, numpy.asarray(c)) assert numpy.allclose(a, numpy.asarray(c))
assert numpy.allclose(a, numpy.asarray(d)) assert numpy.allclose(a, numpy.asarray(d))
...@@ -268,7 +268,7 @@ class test_DimShuffle(unittest.TestCase): ...@@ -268,7 +268,7 @@ class test_DimShuffle(unittest.TestCase):
def test_dot(): def test_dot():
print >>sys.stdout, 'starting test_dot' #print >>sys.stdout, 'starting test_dot'
utt.seed_rng() utt.seed_rng()
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -320,8 +320,8 @@ def test_sum(): ...@@ -320,8 +320,8 @@ def test_sum():
a0sum = a0.sum(axis=0) a0sum = a0.sum(axis=0)
b0sum = b0.reduce_sum([1,0]) b0sum = b0.reduce_sum([1,0])
print 'asum\n',a0sum #print 'asum\n',a0sum
print 'bsum\n',numpy.asarray(b0sum) #print 'bsum\n',numpy.asarray(b0sum)
assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0]))) assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0])))
assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0,1]))) assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0,1])))
...@@ -932,7 +932,7 @@ def test_base(): ...@@ -932,7 +932,7 @@ def test_base():
c = a[0] c = a[0]
d = c[:,0] d = c[:,0]
print d.shape #print d.shape
assert c.base is a assert c.base is a
assert d.base is a assert d.base is a
......
...@@ -103,7 +103,7 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, ...@@ -103,7 +103,7 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
mode = get_mode(use_gpu) mode = get_mode(use_gpu)
print 'building pfunc ...' #print 'building pfunc ...'
train = pfunc([x, y, lr], [loss], mode=mode, train = pfunc([x, y, lr], [loss], mode=mode,
updates=[(p, p - g) for p, g in izip(params, gparams)]) updates=[(p, p - g) for p, g in izip(params, gparams)])
...@@ -138,9 +138,9 @@ def test_run_nnet(): ...@@ -138,9 +138,9 @@ def test_run_nnet():
theano.gradient.numeric_grad.abs_rel_err(rval_gpu, theano.gradient.numeric_grad.abs_rel_err(rval_gpu,
rval_cpu) rval_cpu)
max_abs_diff = abs_diff.max() max_abs_diff = abs_diff.max()
print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % ( #print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % (
max_abs_diff, rel_diff.max(), n_in, n_hid) # max_abs_diff, rel_diff.max(), n_in, n_hid)
print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg) #print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg)
rtol = 1e-4 rtol = 1e-4
if n_in * n_hid >= 2048 * 4096: if n_in * n_hid >= 2048 * 4096:
rtol = 7e-4 rtol = 7e-4
...@@ -192,14 +192,14 @@ def run_conv_nnet1(use_gpu): ...@@ -192,14 +192,14 @@ def run_conv_nnet1(use_gpu):
hid_flat = hid.reshape((n_batch, n_hid)) hid_flat = hid.reshape((n_batch, n_hid))
out = tensor.tanh(tensor.dot(hid_flat, v)+c) out = tensor.tanh(tensor.dot(hid_flat, v)+c)
loss = tensor.sum(0.5 * (out-y)**2 * lr) loss = tensor.sum(0.5 * (out-y)**2 * lr)
print 'loss type', loss.type #print 'loss type', loss.type
params = [w, b, v, c] params = [w, b, v, c]
gparams = tensor.grad(loss, params) gparams = tensor.grad(loss, params)
mode = get_mode(use_gpu) mode = get_mode(use_gpu)
print 'building pfunc ...' #print 'building pfunc ...'
train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)]) train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
# for i, n in enumerate(train.maker.env.toposort()): # for i, n in enumerate(train.maker.env.toposort()):
...@@ -211,7 +211,7 @@ def run_conv_nnet1(use_gpu): ...@@ -211,7 +211,7 @@ def run_conv_nnet1(use_gpu):
for i in xrange(n_train): for i in xrange(n_train):
rval = train(xval, yval, lr) rval = train(xval, yval, lr)
print 'training done' #print 'training done'
print_mode(mode) print_mode(mode)
return rval return rval
...@@ -281,14 +281,14 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST ...@@ -281,14 +281,14 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
hid_flat = hid1.reshape((n_batch, n_hid)) hid_flat = hid1.reshape((n_batch, n_hid))
out = tensor.tanh(tensor.dot(hid_flat, v)+c) out = tensor.tanh(tensor.dot(hid_flat, v)+c)
loss = tensor.sum(0.5 * (out-y)**2 * lr) loss = tensor.sum(0.5 * (out-y)**2 * lr)
print 'loss type', loss.type #print 'loss type', loss.type
params = [w0, b0, w1, b1, v, c] params = [w0, b0, w1, b1, v, c]
gparams = tensor.grad(loss, params) gparams = tensor.grad(loss, params)
mode = get_mode(use_gpu) mode = get_mode(use_gpu)
print 'building pfunc ...' #print 'building pfunc ...'
train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)]) train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
# for i, n in enumerate(train.maker.env.toposort()): # for i, n in enumerate(train.maker.env.toposort()):
...@@ -310,7 +310,7 @@ def test_conv_nnet2(): ...@@ -310,7 +310,7 @@ def test_conv_nnet2():
if True: if True:
utt.seed_rng() utt.seed_rng()
rval_cpu = run_conv_nnet2(False) rval_cpu = run_conv_nnet2(False)
print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0] #print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4) assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
...@@ -350,9 +350,9 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, ...@@ -350,9 +350,9 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
v = shared_fn(0.01*my_randn(n_hid, n_out), 'v') v = shared_fn(0.01*my_randn(n_hid, n_out), 'v')
c = shared_fn(my_zeros(n_out), 'c') c = shared_fn(my_zeros(n_out), 'c')
print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape #print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape #print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape #print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape
x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x') x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
...@@ -375,14 +375,14 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, ...@@ -375,14 +375,14 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
hid_flat = hid1.reshape((n_batch, n_hid)) hid_flat = hid1.reshape((n_batch, n_hid))
out = tensor.nnet.softmax(tensor.dot(hid_flat, v)+c) out = tensor.nnet.softmax(tensor.dot(hid_flat, v)+c)
loss = tensor.sum(tensor.nnet.crossentropy_categorical_1hot(out, tensor.argmax(y, axis=1)) * lr) loss = tensor.sum(tensor.nnet.crossentropy_categorical_1hot(out, tensor.argmax(y, axis=1)) * lr)
print 'loss type', loss.type #print 'loss type', loss.type
params = [w0, b0, w1, b1, v, c] params = [w0, b0, w1, b1, v, c]
gparams = tensor.grad(loss, params, warn_type=True) gparams = tensor.grad(loss, params, warn_type=True)
mode = get_mode(use_gpu, check_isfinite) mode = get_mode(use_gpu, check_isfinite)
print 'building pfunc ...' #print 'building pfunc ...'
train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)]) train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
if verbose: if verbose:
...@@ -437,9 +437,9 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize, ...@@ -437,9 +437,9 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
print pickle.dumps(mode) print pickle.dumps(mode)
print "END %s profile mode dump" % device print "END %s profile mode dump" % device
print "%s time: %.3f" % (device, t1-t0) #print "%s time: %.3f" % (device, t1-t0)
print "estimated time for one pass through MNIST with %s: %f" % ( #print "estimated time for one pass through MNIST with %s: %f" % (
device, (t1-t0) * (60000.0 / (n_train*bsize))) # device, (t1-t0) * (60000.0 / (n_train*bsize)))
def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
...@@ -465,7 +465,7 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -465,7 +465,7 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
orig_float32_atol = theano.tensor.basic.float32_atol orig_float32_atol = theano.tensor.basic.float32_atol
try: try:
if float_atol: if float_atol:
print "float_atol", float_atol #print "float_atol", float_atol
theano.tensor.basic.float32_atol = float_atol theano.tensor.basic.float32_atol = float_atol
if gpu_only and cpu_only: if gpu_only and cpu_only:
...@@ -565,12 +565,12 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -565,12 +565,12 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
print pickle.dumps(gpu_mode) print pickle.dumps(gpu_mode)
print "END GPU profile mode dump" print "END GPU profile mode dump"
print "CPU time: %.3f, GPU time: %.3f, speed up %f" % ( #print "CPU time: %.3f, GPU time: %.3f, speed up %f" % (
(time_cpu, time_gpu, time_cpu/time_gpu)) # (time_cpu, time_gpu, time_cpu/time_gpu))
print "Estimated time for one pass through MNIST with CPU: %f" % ( #print "Estimated time for one pass through MNIST with CPU: %f" % (
(time_cpu * (60000.0 / (n_train*bsize)))) # (time_cpu * (60000.0 / (n_train*bsize))))
print "Estimated time for one pass through MNIST with GPU: %f" % ( #print "Estimated time for one pass through MNIST with GPU: %f" % (
(time_gpu * (60000.0 / (n_train*bsize)))) # (time_gpu * (60000.0 / (n_train*bsize))))
# Default parameters for all subsequent tests # Default parameters for all subsequent tests
......
...@@ -497,6 +497,13 @@ class StrucutedAddSVCSR(gof.Op): ...@@ -497,6 +497,13 @@ class StrucutedAddSVCSR(gof.Op):
return hash(type(self)) return hash(type(self))
def make_node(self, a_data, a_indices, a_indptr, b): def make_node(self, a_data, a_indices, a_indptr, b):
b = tensor.as_tensor_variable(b)
a_data = tensor.as_tensor_variable(a_data)
a_indices = tensor.as_tensor_variable(a_indices)
a_indptr = tensor.as_tensor_variable(a_indptr)
assert a_data.type.ndim == 1
assert a_indices.type.ndim == 1
assert a_indptr.type.ndim == 1
assert b.type.ndim == 1 assert b.type.ndim == 1
return gof.Apply(self, [a_data, a_indices, a_indptr, b], return gof.Apply(self, [a_data, a_indices, a_indptr, b],
[tensor.tensor(b.dtype, (False,))]) [tensor.tensor(b.dtype, (False,))])
......
...@@ -335,7 +335,7 @@ class TestConv3D(unittest.TestCase): ...@@ -335,7 +335,7 @@ class TestConv3D(unittest.TestCase):
col_steps = self.rng.randint(1,4) col_steps = self.rng.randint(1,4)
time_steps = self.rng.randint(1,4) time_steps = self.rng.randint(1,4)
print (row_steps,col_steps,time_steps) #print (row_steps,col_steps,time_steps)
videoDur = (time_steps-1)*dt+filterDur + self.rng.randint(0,3) videoDur = (time_steps-1)*dt+filterDur + self.rng.randint(0,3)
videoWidth = (col_steps-1)*dc+filterWidth + self.rng.randint(0,3) videoWidth = (col_steps-1)*dc+filterWidth + self.rng.randint(0,3)
......
...@@ -112,8 +112,8 @@ class T_SoftmaxWithBias(unittest.TestCase): ...@@ -112,8 +112,8 @@ class T_SoftmaxWithBias(unittest.TestCase):
assert softmax_with_bias not in ops assert softmax_with_bias not in ops
assert softmax in ops assert softmax in ops
print f([0,1,0]) f([0,1,0])
print f.maker.env.toposort() #print f.maker.env.toposort()
def test_infer_shape(self): def test_infer_shape(self):
fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape) fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape)
...@@ -299,20 +299,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -299,20 +299,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
[op(softmax(x+b), one_of_n)]) [op(softmax(x+b), one_of_n)])
assert env.outputs[0].owner.op == op assert env.outputs[0].owner.op == op
print 'BEFORE' #print 'BEFORE'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print printing.pprint(node.outputs[0]) #print printing.pprint(node.outputs[0])
print '----' #print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' #print 'AFTER'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print printing.pprint(node.outputs[0]) #print printing.pprint(node.outputs[0])
print '====' #print '===='
assert len(env.toposort()) == 2 assert len(env.toposort()) == 2
assert str(env.outputs[0].owner.op) == 'OutputGuard' assert str(env.outputs[0].owner.op) == 'OutputGuard'
...@@ -330,18 +330,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -330,18 +330,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
[op(softmax(T.add(x,b,c)), one_of_n)]) [op(softmax(T.add(x,b,c)), one_of_n)])
assert env.outputs[0].owner.op == op assert env.outputs[0].owner.op == op
print 'BEFORE' #print 'BEFORE'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print '----' #print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' #print 'AFTER'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print '====' #print '===='
assert len(env.toposort()) == 3 assert len(env.toposort()) == 3
assert str(env.outputs[0].owner.op) == 'OutputGuard' assert str(env.outputs[0].owner.op) == 'OutputGuard'
...@@ -356,18 +356,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -356,18 +356,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x+b), one_of_n)]) [op(softmax(x+b), one_of_n)])
assert env.outputs[0].owner.op == op assert env.outputs[0].owner.op == op
print 'BEFORE' #print 'BEFORE'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print printing.pprint(node.outputs[0]) #print printing.pprint(node.outputs[0])
print '----' #print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' #print 'AFTER'
for node in env.toposort(): #for node in env.toposort():
print node.op # print node.op
print '====' #print '===='
assert len(env.toposort()) == 3 assert len(env.toposort()) == 3
assert str(env.outputs[0].owner.op) == 'OutputGuard' assert str(env.outputs[0].owner.op) == 'OutputGuard'
assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
...@@ -385,16 +385,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -385,16 +385,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
[x, one_of_n], [x, one_of_n],
[g_x]) [g_x])
print 'BEFORE' #print 'BEFORE'
for node in env.toposort(): #for node in env.toposort():
print node.op, node.inputs # print node.op, node.inputs
print '----' #print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' #print 'AFTER'
for node in env.toposort(): #for node in env.toposort():
print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like. # cleaned up as well as we'd like.
...@@ -428,16 +428,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -428,16 +428,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
[x, one_of_n], [x, one_of_n],
[g_x]) [g_x])
print 'BEFORE' #print 'BEFORE'
for node in env.toposort(): #for node in env.toposort():
print node.op, node.inputs # print node.op, node.inputs
print '----' #print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' #print 'AFTER'
for node in env.toposort(): #for node in env.toposort():
print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like. # cleaned up as well as we'd like.
...@@ -1021,9 +1021,9 @@ class Test_softmax_opt: ...@@ -1021,9 +1021,9 @@ class Test_softmax_opt:
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y, mode=self.mode) f = theano.function([c],p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.env.toposort()] f_ops = [n.op for n in f.maker.env.toposort()]
print '--- f =' #print '--- f ='
printing.debugprint(f) #printing.debugprint(f)
print '===' #print '==='
assert len(f_ops) == 1 assert len(f_ops) == 1
assert softmax in f_ops assert softmax in f_ops
f(self.rng.rand(3,4).astype(config.floatX)) f(self.rng.rand(3,4).astype(config.floatX))
...@@ -1041,9 +1041,9 @@ class Test_softmax_opt: ...@@ -1041,9 +1041,9 @@ class Test_softmax_opt:
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.env.toposort()] g_ops = [n.op for n in g.maker.env.toposort()]
print '--- g =' #print '--- g ='
printing.debugprint(g) #printing.debugprint(g)
print '===' #print '==='
raise SkipTest('Optimization not enabled for the moment') raise SkipTest('Optimization not enabled for the moment')
assert len(g_ops) == 2 assert len(g_ops) == 2
...@@ -1058,7 +1058,7 @@ class Test_softmax_opt: ...@@ -1058,7 +1058,7 @@ class Test_softmax_opt:
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y) f = theano.function([c],p_y)
printing.debugprint(f) #printing.debugprint(f)
# test that function contains softmax and no div. # test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
...@@ -1067,7 +1067,7 @@ class Test_softmax_opt: ...@@ -1067,7 +1067,7 @@ class Test_softmax_opt:
g = theano.function([c],T.grad(p_y.sum(), c)) g = theano.function([c],T.grad(p_y.sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
printing.debugprint(g) #printing.debugprint(g)
raise SkipTest('Optimization not enabled for the moment') raise SkipTest('Optimization not enabled for the moment')
def test_1d_basic(self): def test_1d_basic(self):
...@@ -1077,7 +1077,7 @@ class Test_softmax_opt: ...@@ -1077,7 +1077,7 @@ class Test_softmax_opt:
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c], p_y) f = theano.function([c], p_y)
printing.debugprint(f) #printing.debugprint(f)
# test that function contains softmax and no div. # test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
...@@ -1086,7 +1086,7 @@ class Test_softmax_opt: ...@@ -1086,7 +1086,7 @@ class Test_softmax_opt:
g = theano.function([c], T.grad(p_y.sum(), c)) g = theano.function([c], T.grad(p_y.sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
printing.debugprint(g) #printing.debugprint(g)
raise SkipTest('Optimization not enabled for the moment') raise SkipTest('Optimization not enabled for the moment')
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc. # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
......
"""Define RandomStreams, providing random number variables for Theano graphs.""" """Define RandomStreams, providing random number variables for Theano
graphs.
"""
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import sys import sys
...@@ -8,6 +11,7 @@ from theano.compile import module, In, Component ...@@ -8,6 +11,7 @@ from theano.compile import module, In, Component
from theano.gof import Container from theano.gof import Container
from theano.tensor import raw_random from theano.tensor import raw_random
class RandomStreamsInstance(object): class RandomStreamsInstance(object):
"""RandomStreamsInstance""" """RandomStreamsInstance"""
def __init__(self, random_streams, memo, default_seed): def __init__(self, random_streams, memo, default_seed):
...@@ -18,24 +22,26 @@ class RandomStreamsInstance(object): ...@@ -18,24 +22,26 @@ class RandomStreamsInstance(object):
def initialize(self, seed=None): def initialize(self, seed=None):
"""Initialize each random stream """Initialize each random stream
:param seed: each random stream will be assigned a unique state that depends :param seed: each random stream will be assigned a unique
deterministically on this value. state that depends deterministically on this value.
:type seed: None or integer in range 0 to 2**30 :type seed: None or integer in range 0 to 2**30
:rtype: None :rtype: None
""" """
self.seed(seed) self.seed(seed)
def seed(self, seed=None): def seed(self, seed=None):
"""Re-initialize each random stream """Re-initialize each random stream
:param seed: each random stream will be assigned a unique state that depends :param seed: each random stream will be assigned a unique
deterministically on this value. state that depends deterministically on this value.
:type seed: None or integer in range 0 to 2**30 :type seed: None or integer in range 0 to 2**30
:rtype: None :rtype: None
""" """
if seed is None: if seed is None:
seed = self.default_seed seed = self.default_seed
...@@ -43,19 +49,24 @@ class RandomStreamsInstance(object): ...@@ -43,19 +49,24 @@ class RandomStreamsInstance(object):
#seed = self.default_seed if seed is None else seed #seed = self.default_seed if seed is None else seed
seedgen = numpy.random.RandomState(seed) seedgen = numpy.random.RandomState(seed)
for old_r, new_r in self.random_streams.random_state_variables: for old_r, new_r in self.random_streams.random_state_variables:
old_r_seed = seedgen.randint(2**30) old_r_seed = seedgen.randint(2 ** 30)
old_r_container = self.memo[old_r].value old_r_container = self.memo[old_r].value
if old_r_container.value is None: if old_r_container.value is None:
#the cast to int here makes it work on 32bit machines, not sure why #the cast to int here makes it work on 32bit machines,
old_r_container.value = numpy.random.RandomState(int(old_r_seed)) #not sure why
old_r_container.value = numpy.random.RandomState(
int(old_r_seed))
else: else:
#the cast to int here makes it work on 32bit machines, not sure why #the cast to int here makes it work on 32bit machines,
#not sure why
old_r_container.value.seed(int(old_r_seed)) old_r_container.value.seed(int(old_r_seed))
def __getitem__(self, item): def __getitem__(self, item):
"""Retrieve the numpy RandomState instance associated with a particular stream """Retrieve the numpy RandomState instance associated with a
particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream :param item: a variable of type RandomStateType, associated
with this RandomStream
:rtype: numpy RandomState (or None, before initialize) :rtype: numpy RandomState (or None, before initialize)
...@@ -67,9 +78,11 @@ class RandomStreamsInstance(object): ...@@ -67,9 +78,11 @@ class RandomStreamsInstance(object):
raise KeyError(item) raise KeyError(item)
def __setitem__(self, item, val): def __setitem__(self, item, val):
"""Set the numpy RandomState instance associated with a particular stream """Set the numpy RandomState instance associated with a
particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream :param item: a variable of type RandomStateType, associated
with this RandomStream
:param val: the new value :param val: the new value
:type val: numpy RandomState :type val: numpy RandomState
...@@ -78,7 +91,8 @@ class RandomStreamsInstance(object): ...@@ -78,7 +91,8 @@ class RandomStreamsInstance(object):
""" """
if type(val) is not numpy.random.RandomState: if type(val) is not numpy.random.RandomState:
raise TypeError('only values of type RandomState are permitted', val) raise TypeError('only values of type RandomState are permitted',
val)
for old_r, new_r in self.random_streams.random_state_variables: for old_r, new_r in self.random_streams.random_state_variables:
if item is old_r: if item is old_r:
container = self.memo[item].value container = self.memo[item].value
...@@ -86,24 +100,34 @@ class RandomStreamsInstance(object): ...@@ -86,24 +100,34 @@ class RandomStreamsInstance(object):
return return
raise KeyError(item) raise KeyError(item)
class RandomStreams(Component, raw_random.RandomStreamsBase): class RandomStreams(Component, raw_random.RandomStreamsBase):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)""" """Module component with similar interface to numpy.random
(numpy.random.RandomState)
"""
random_state_variables = [] random_state_variables = []
"""A list of pairs of the form (input_r, output_r). This will be over-ridden by the module """A list of pairs of the form (input_r, output_r). This will be
instance to contain stream generators. over-ridden by the module instance to contain stream
generators.
""" """
default_instance_seed = None default_instance_seed = None
"""Instance variable should take None or integer value. Used to seed the random number """Instance variable should take None or integer value. Used to
generator that provides seeds for member streams""" seed the random number generator that provides seeds for member
streams
def __init__(self, seed=None):
""" """
:type seed: None or int
:param seed: a default seed to initialize the RandomState instances after build. See def __init__(self, seed=None):
`RandomStreamsInstance.__init__` for more details. """:type seed: None or int
:param seed: a default seed to initialize the RandomState
instances after build. See `RandomStreamsInstance.__init__`
for more details.
""" """
super(RandomStreams, self).__init__() super(RandomStreams, self).__init__()
self.random_state_variables = [] self.random_state_variables = []
...@@ -124,7 +148,8 @@ class RandomStreams(Component, raw_random.RandomStreamsBase): ...@@ -124,7 +148,8 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""override `Component.build` """ """override `Component.build` """
if self not in memo: if self not in memo:
print 'creating RandomStreamsInstance' print 'creating RandomStreamsInstance'
memo[self] = RandomStreamsInstance(self, memo, self.default_instance_seed) memo[self] = RandomStreamsInstance(self, memo,
self.default_instance_seed)
return memo[self] return memo[self]
def gen(self, op, *args, **kwargs): def gen(self, op, *args, **kwargs):
...@@ -136,14 +161,15 @@ class RandomStreams(Component, raw_random.RandomStreamsBase): ...@@ -136,14 +161,15 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
:param kwargs: interpreted by `op` :param kwargs: interpreted by `op`
:returns: The symbolic random draw part of op()'s return value. This function stores :returns: The symbolic random draw part of op()'s return
the updated RandomStateType Variable for use at `build` time. value. This function stores the updated RandomStateType
Variable for use at `build` time.
:rtype: TensorVariable :rtype: TensorVariable
""" """
random_state_variable = raw_random.random_state_type() random_state_variable = raw_random.random_state_type()
new_r, out = op(random_state_variable, *args, **kwargs) new_r, out = op(random_state_variable, *args, **kwargs)
out.rng = random_state_variable out.rng = random_state_variable
self.random_state_variables.append((random_state_variable, new_r)) self.random_state_variables.append((random_state_variable, new_r))
return out return out
...@@ -2,14 +2,17 @@ ...@@ -2,14 +2,17 @@
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import sys import sys
from copy import copy from copy import copy
import numpy import numpy
#local imports #local imports
import theano
import basic as tensor import basic as tensor
import opt, theano import opt
from theano import gof from theano import gof
from theano.compile import optdb from theano.compile import optdb
class RandomStateType(gof.Type): class RandomStateType(gof.Type):
"""A Type wrapper for numpy.RandomState """A Type wrapper for numpy.RandomState
...@@ -157,8 +160,8 @@ class RandomFunction(gof.Op): ...@@ -157,8 +160,8 @@ class RandomFunction(gof.Op):
print >> sys.stderr, 'WARNING: RandomState instances should be in RandomStateType' print >> sys.stderr, 'WARNING: RandomState instances should be in RandomStateType'
if 0: if 0:
raise TypeError('r must be RandomStateType instance', r) raise TypeError('r must be RandomStateType instance', r)
# the following doesn't work because we want to ignore the broadcastable flags in # the following doesn't work because we want to ignore the
# shape.type # broadcastable flags in shape.type
# assert shape.type == tensor.lvector # assert shape.type == tensor.lvector
# convert args to TensorType instances # convert args to TensorType instances
...@@ -173,7 +176,7 @@ class RandomFunction(gof.Op): ...@@ -173,7 +176,7 @@ class RandomFunction(gof.Op):
r, shp = node.inputs[0:2] r, shp = node.inputs[0:2]
#if shp is a constant array of len 0, then it means 'automatic shape' #if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape = len(getattr(shp, 'data', [0,1,2])) == 0 unknown_shape = len(getattr(shp, 'data', [0, 1, 2])) == 0
# if ndim_added == 0 and shape != () then shape # if ndim_added == 0 and shape != () then shape
if self.ndim_added == 0 and not unknown_shape: if self.ndim_added == 0 and not unknown_shape:
...@@ -188,8 +191,8 @@ class RandomFunction(gof.Op): ...@@ -188,8 +191,8 @@ class RandomFunction(gof.Op):
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
rout, out = out_ rout, out = out_
# Use self.fn to draw shape worth of random numbers. # Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if # Numbers are drawn from r if self.inplace is True, and from a
# self.inplace is False # copy of r if self.inplace is False
r, shape, args = inputs[0], inputs[1], inputs[2:] r, shape, args = inputs[0], inputs[1], inputs[2:]
assert type(r) == numpy.random.RandomState, (type(r), r) assert type(r) == numpy.random.RandomState, (type(r), r)
r_orig = r r_orig = r
...@@ -203,34 +206,44 @@ class RandomFunction(gof.Op): ...@@ -203,34 +206,44 @@ class RandomFunction(gof.Op):
else: else:
shape = tuple(shape) shape = tuple(shape)
if shape is not None and self.outtype.ndim != len(shape) + self.ndim_added: if (shape is not None and
raise ValueError('Shape mismatch: self.outtype.ndim (%i) != len(shape) (%i) + self.ndim_added (%i)'\ self.outtype.ndim != len(shape) + self.ndim_added):
%(self.outtype.ndim, len(shape), self.ndim_added)) raise ValueError('Shape mismatch: self.outtype.ndim (%i) !='
' len(shape) (%i) + self.ndim_added (%i)'
% (self.outtype.ndim, len(shape), self.ndim_added))
if not self.inplace: if not self.inplace:
r = copy(r) r = copy(r)
rout[0] = r rout[0] = r
rval = self.fn(r, *(args + [shape])) rval = self.fn(r, *(args + [shape]))
if not isinstance(rval, numpy.ndarray) \ if not isinstance(rval, numpy.ndarray) \
or str(rval.dtype) != node.outputs[1].type.dtype: or str(rval.dtype) != node.outputs[1].type.dtype:
rval = theano._asarray(rval, dtype = node.outputs[1].type.dtype) rval = theano._asarray(rval, dtype=node.outputs[1].type.dtype)
# When shape is None, numpy has a tendency to unexpectedly # When shape is None, numpy has a tendency to unexpectedly
# return a scalar instead of a higher-dimension array containing # return a scalar instead of a higher-dimension array containing
# only one element. This value should be reshaped # only one element. This value should be reshaped
if shape is None and rval.ndim == 0 and self.outtype.ndim > 0: if shape is None and rval.ndim == 0 and self.outtype.ndim > 0:
rval = rval.reshape([1]*self.outtype.ndim) rval = rval.reshape([1] * self.outtype.ndim)
if len(rval.shape) != self.outtype.ndim: if len(rval.shape) != self.outtype.ndim:
raise ValueError('Shape mismatch: "out" should have dimension %i, but the value produced by "perform" has dimension %i'\ raise ValueError('Shape mismatch: "out" should have dimension %i,'
' but the value produced by "perform" has'
' dimension %i'
% (self.outtype.ndim, len(rval.shape))) % (self.outtype.ndim, len(rval.shape)))
# Check the output has the right shape # Check the output has the right shape
if shape is not None: if shape is not None:
if self.ndim_added == 0 and shape != rval.shape: if self.ndim_added == 0 and shape != rval.shape:
raise ValueError('Shape mismatch: "out" should have shape %s, but the value produced by "perform" has shape %s'\ raise ValueError(
'Shape mismatch: "out" should have shape %s, but the'
' value produced by "perform" has shape %s'
% (shape, rval.shape)) % (shape, rval.shape))
elif self.ndim_added > 0 and shape != rval.shape[:-self.ndim_added]: elif (self.ndim_added > 0 and
raise ValueError('Shape mismatch: "out" should have shape starting with %s (plus %i extra dimensions), but the value produced by "perform" has shape %s'\ shape != rval.shape[:-self.ndim_added]):
raise ValueError(
'Shape mismatch: "out" should have shape starting with'
' %s (plus %i extra dimensions), but the value produced'
' by "perform" has shape %s'
% (shape, self.ndim_added, rval.shape)) % (shape, self.ndim_added, rval.shape))
out[0] = rval out[0] = rval
...@@ -260,9 +273,11 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -260,9 +273,11 @@ def _infer_ndim_bcast(ndim, shape, *args):
# there is a convention that -1 means the corresponding shape of a # there is a convention that -1 means the corresponding shape of a
# potentially-broadcasted symbolic arg # potentially-broadcasted symbolic arg
if (isinstance(shape, (tuple, list)) if (isinstance(shape, (tuple, list))
and numpy.all(numpy.asarray(shape)>=0)): and numpy.all(numpy.asarray(shape) >= 0)):
bcast = [(s==1) for s in shape] bcast = [(s == 1) for s in shape]
v_shape = tensor.TensorConstant(type=tensor.lvector, data=theano._asarray(shape, dtype='int64')) v_shape = tensor.TensorConstant(type=tensor.lvector,
data=theano._asarray(shape,
dtype='int64'))
shape_ndim = len(shape) shape_ndim = len(shape)
if ndim is None: if ndim is None:
ndim = shape_ndim ndim = shape_ndim
...@@ -278,21 +293,21 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -278,21 +293,21 @@ def _infer_ndim_bcast(ndim, shape, *args):
# This case combines together symbolic and non-symbolic shape # This case combines together symbolic and non-symbolic shape
# information # information
if ndim is None: if ndim is None:
ndim=args_ndim ndim = args_ndim
else: else:
ndim = max(args_ndim, ndim) ndim = max(args_ndim, ndim)
ndim = max(args_ndim, len(shape)) ndim = max(args_ndim, len(shape))
shape = [-1]*(ndim - len(shape))+list(shape) shape = [-1] * (ndim - len(shape)) + list(shape)
bcast = [] bcast = []
pre_v_shape = [] pre_v_shape = []
for i,s in enumerate(shape): for i, s in enumerate(shape):
if hasattr(s, 'type'): # s is symbolic if hasattr(s, 'type'): # s is symbolic
bcast.append(False) # todo - introspect further bcast.append(False) # todo - introspect further
pre_v_shape.append(s) pre_v_shape.append(s)
else: else:
if s >= 0: if s >= 0:
pre_v_shape.append(tensor.as_tensor_variable(s)) pre_v_shape.append(tensor.as_tensor_variable(s))
bcast.append((s==1)) bcast.append((s == 1))
elif s == -1: elif s == -1:
n_a_i = 0 n_a_i = 0
for a in args: for a in args:
...@@ -301,7 +316,7 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -301,7 +316,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
# i # i
if i >= ndim - a.ndim: if i >= ndim - a.ndim:
n_a_i += 1 n_a_i += 1
a_i = i + a.ndim -ndim a_i = i + a.ndim - ndim
if not a.broadcastable[a_i]: if not a.broadcastable[a_i]:
pre_v_shape.append(a.shape[a_i]) pre_v_shape.append(a.shape[a_i])
bcast.append(False) bcast.append(False)
...@@ -316,7 +331,8 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -316,7 +331,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
bcast.append(True) bcast.append(True)
else: else:
ValueError('negative shape', s) ValueError('negative shape', s)
# post-condition: shape may still contain both symbolic and non-symbolic things # post-condition: shape may still contain both symbolic and
# non-symbolic things
v_shape = tensor.stack(*pre_v_shape) v_shape = tensor.stack(*pre_v_shape)
elif shape is None: elif shape is None:
...@@ -325,7 +341,7 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -325,7 +341,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
if not args: if not args:
raise TypeError(('_infer_ndim_bcast cannot infer shape without' raise TypeError(('_infer_ndim_bcast cannot infer shape without'
' either shape or args')) ' either shape or args'))
template = reduce(lambda a,b:a+b, args) template = reduce(lambda a, b: a + b, args)
v_shape = template.shape v_shape = template.shape
bcast = template.broadcastable bcast = template.broadcastable
ndim = template.ndim ndim = template.ndim
...@@ -333,18 +349,22 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -333,18 +349,22 @@ def _infer_ndim_bcast(ndim, shape, *args):
v_shape = tensor.as_tensor_variable(shape) v_shape = tensor.as_tensor_variable(shape)
if ndim is None: if ndim is None:
ndim = tensor.get_vector_length(v_shape) ndim = tensor.get_vector_length(v_shape)
bcast = [False]*ndim bcast = [False] * ndim
if not (v_shape.dtype.startswith('int') or v_shape.dtype.startswith('uint')): if (not (v_shape.dtype.startswith('int') or
raise TypeError('shape must be an integer vector or list', v_shape.dtype) v_shape.dtype.startswith('uint'))):
raise TypeError('shape must be an integer vector or list',
v_shape.dtype)
if args_ndim > ndim: if args_ndim > ndim:
raise ValueError('ndim should be at least as big as required by args value', raise ValueError(
'ndim should be at least as big as required by args value',
(ndim, args_ndim), args) (ndim, args_ndim), args)
assert ndim == len(bcast) assert ndim == len(bcast)
return ndim, tensor.cast(v_shape, 'int32'), tuple(bcast) return ndim, tensor.cast(v_shape, 'int32'), tuple(bcast)
def _generate_broadcasting_indices(out_shape, *shapes): def _generate_broadcasting_indices(out_shape, *shapes):
''' '''
Return indices over each shape that broadcast them to match out_shape. Return indices over each shape that broadcast them to match out_shape.
...@@ -359,11 +379,11 @@ def _generate_broadcasting_indices(out_shape, *shapes): ...@@ -359,11 +379,11 @@ def _generate_broadcasting_indices(out_shape, *shapes):
''' '''
all_shapes = (out_shape,) + shapes all_shapes = (out_shape,) + shapes
# Will contain the return value: a list of indices for each argument # Will contain the return value: a list of indices for each argument
ret_indices = [ [()] for shape in all_shapes ] ret_indices = [[()] for shape in all_shapes]
for dim in xrange(len(out_shape)): for dim in xrange(len(out_shape)):
# Temporary list to generate the indices # Temporary list to generate the indices
_ret_indices = [ [] for shape in all_shapes ] _ret_indices = [[] for shape in all_shapes]
out_range = range(out_shape[dim]) out_range = range(out_shape[dim])
...@@ -373,11 +393,14 @@ def _generate_broadcasting_indices(out_shape, *shapes): ...@@ -373,11 +393,14 @@ def _generate_broadcasting_indices(out_shape, *shapes):
for shape in shapes: for shape in shapes:
if shape[dim] == out_shape[dim]: if shape[dim] == out_shape[dim]:
ranges.append(out_range) ranges.append(out_range)
elif shape[dim] == 1: #broadcast elif shape[dim] == 1: # broadcast
ranges.append([0] * out_shape[dim]) ranges.append([0] * out_shape[dim])
else: else:
raise ValueError('shape[%i] (%i) should be equal to out_shape[%i] (%i) or to 1'\ raise ValueError(
% (dim, shape[dim], dim, out_shape[dim]), shape, out_shape, shapes) 'shape[%i] (%i) should be equal to out_shape[%i] (%i) or'
' to 1'
% (dim, shape[dim], dim, out_shape[dim]), shape,
out_shape, shapes)
for prev_index in zip(*ret_indices): for prev_index in zip(*ret_indices):
for dim_index in zip(*ranges): for dim_index in zip(*ranges):
...@@ -435,7 +458,8 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None): ...@@ -435,7 +458,8 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
return op(random_state, size, avg, std) return op(random_state, size, avg, std)
def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob=None): def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
dtype='int64', prob=None):
""" """
Sample n times with probability of success prob for each trial, Sample n times with probability of success prob for each trial,
return the number of successes. return the number of successes.
...@@ -452,7 +476,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob ...@@ -452,7 +476,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob
n = tensor.as_tensor_variable(n) n = tensor.as_tensor_variable(n)
p = tensor.as_tensor_variable(p) p = tensor.as_tensor_variable(p)
ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, p) ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, p)
if n.dtype=='int64': if n.dtype == 'int64':
### THIS WORKS AROUND A NUMPY BUG on 32bit machine ### THIS WORKS AROUND A NUMPY BUG on 32bit machine
### Erase when the following works on a 32bit machine: ### Erase when the following works on a 32bit machine:
### numpy.random.binomial( ### numpy.random.binomial(
...@@ -460,9 +484,10 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob ...@@ -460,9 +484,10 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob
# p=numpy.asarray([.1, .2, .3], dtype='float64')) # p=numpy.asarray([.1, .2, .3], dtype='float64'))
n = tensor.cast(n, 'int32') n = tensor.cast(n, 'int32')
op = RandomFunction('binomial', op = RandomFunction('binomial',
tensor.TensorType(dtype = dtype, broadcastable = (False,)*ndim) ) tensor.TensorType(dtype=dtype, broadcastable=(False,) * ndim))
return op(random_state, size, n, p) return op(random_state, size, n, p)
def random_integers_helper(random_state, low, high, size): def random_integers_helper(random_state, low, high, size):
''' '''
Helper function to draw random integers. Helper function to draw random integers.
...@@ -477,16 +502,19 @@ def random_integers_helper(random_state, low, high, size): ...@@ -477,16 +502,19 @@ def random_integers_helper(random_state, low, high, size):
out_ndim = max(low.ndim, high.ndim) out_ndim = max(low.ndim, high.ndim)
# broadcast low and high to out_ndim dimensions # broadcast low and high to out_ndim dimensions
if low.ndim > out_ndim: if low.ndim > out_ndim:
raise ValueError('low.ndim (%i) should not be larger than len(size) (%i)' % (low.ndim, out_ndim), raise ValueError(
'low.ndim (%i) should not be larger than len(size) (%i)'
% (low.ndim, out_ndim),
low, size) low, size)
if low.ndim < out_ndim: if low.ndim < out_ndim:
low = low.reshape((1,)*(out_ndim-low.ndim) + low.shape) low = low.reshape((1,) * (out_ndim - low.ndim) + low.shape)
if high.ndim > out_ndim: if high.ndim > out_ndim:
raise ValueError('high.ndim (%i) should not be larger than len(size) (%i)' % (high.ndim, out_ndim), raise ValueError(
high, size) 'high.ndim (%i) should not be larger than len(size) (%i)'
% (high.ndim, out_ndim), high, size)
if high.ndim < out_ndim: if high.ndim < out_ndim:
high = high.reshape((1,)*(out_ndim-high.ndim) + high.shape) high = high.reshape((1,) * (out_ndim - high.ndim) + high.shape)
if size is not None: if size is not None:
out_size = tuple(size) out_size = tuple(size)
...@@ -498,14 +526,17 @@ def random_integers_helper(random_state, low, high, size): ...@@ -498,14 +526,17 @@ def random_integers_helper(random_state, low, high, size):
# Build the indices over which to loop # Build the indices over which to loop
out = numpy.ndarray(out_size) out = numpy.ndarray(out_size)
broadcast_ind = _generate_broadcasting_indices(out_size, low.shape, high.shape) broadcast_ind = _generate_broadcasting_indices(out_size, low.shape,
high.shape)
# Iterate over these indices, drawing one sample at a time from numpy # Iterate over these indices, drawing one sample at a time from numpy
for oi, li, hi in zip(*broadcast_ind): for oi, li, hi in zip(*broadcast_ind):
out[oi] = random_state.random_integers(low = low[li], high = high[hi]) out[oi] = random_state.random_integers(low=low[li], high=high[hi])
return out return out
def random_integers(random_state, size=None, low=0, high=1, ndim=None, dtype='int64'):
def random_integers(random_state, size=None, low=0, high=1, ndim=None,
dtype='int64'):
""" """
Sample a random integer between low and high, both inclusive. Sample a random integer between low and high, both inclusive.
...@@ -522,6 +553,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None, dtype='in ...@@ -522,6 +553,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None, dtype='in
tensor.TensorType(dtype=dtype, broadcastable=bcast)) tensor.TensorType(dtype=dtype, broadcastable=bcast))
return op(random_state, size, low, high) return op(random_state, size, low, high)
def permutation_helper(random_state, n, shape): def permutation_helper(random_state, n, shape):
"""Helper function to generate permutations from integers. """Helper function to generate permutations from integers.
...@@ -552,6 +584,7 @@ def permutation_helper(random_state, n, shape): ...@@ -552,6 +584,7 @@ def permutation_helper(random_state, n, shape):
#print 'RETURNING', out.shape #print 'RETURNING', out.shape
return out return out
def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'): def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
""" """
Returns permutations of the integers between 0 and n-1, as many times Returns permutations of the integers between 0 and n-1, as many times
...@@ -569,10 +602,11 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'): ...@@ -569,10 +602,11 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
ndim, size, bcast = _infer_ndim_bcast(ndim, size) ndim, size, bcast = _infer_ndim_bcast(ndim, size)
#print "NDIM", ndim, size #print "NDIM", ndim, size
op = RandomFunction(permutation_helper, op = RandomFunction(permutation_helper,
tensor.TensorType(dtype=dtype, broadcastable=bcast+(False,)), tensor.TensorType(dtype=dtype, broadcastable=bcast + (False,)),
ndim_added=1) ndim_added=1)
return op(random_state, size, n) return op(random_state, size, n)
def multinomial_helper(random_state, n, pvals, size): def multinomial_helper(random_state, n, pvals, size):
''' '''
Helper function drawing from multinomial distributions. Helper function drawing from multinomial distributions.
...@@ -586,21 +620,25 @@ def multinomial_helper(random_state, n, pvals, size): ...@@ -586,21 +620,25 @@ def multinomial_helper(random_state, n, pvals, size):
if size is not None: if size is not None:
ndim = len(size) ndim = len(size)
else: else:
ndim = max(n.ndim, pvals.ndim-1) ndim = max(n.ndim, pvals.ndim - 1)
out_ndim = ndim+1 out_ndim = ndim + 1
# broadcast n to ndim dimensions and pvals to ndim+1 # broadcast n to ndim dimensions and pvals to ndim+1
if n.ndim > ndim: if n.ndim > ndim:
raise ValueError('n.ndim (%i) should not be larger than len(size) (%i)' % (n.ndim, ndim), raise ValueError(
'n.ndim (%i) should not be larger than len(size) (%i)'
% (n.ndim, ndim),
n, size) n, size)
if n.ndim < ndim: if n.ndim < ndim:
n = n.reshape((1,)*(ndim-n.ndim) + n.shape) n = n.reshape((1,) * (ndim - n.ndim) + n.shape)
if pvals.ndim-1 > ndim: if pvals.ndim - 1 > ndim:
raise ValueError('pvals.ndim-1 (%i) should not be larger than len(size) (%i)' % (pvals.ndim-1, ndim), raise ValueError(
'pvals.ndim-1 (%i) should not be larger than len(size) (%i)'
% (pvals.ndim - 1, ndim),
pvals, size) pvals, size)
if pvals.ndim-1 < ndim: if pvals.ndim - 1 < ndim:
pvals = pvals.reshape((1,)*(ndim-pvals.ndim+1) + pvals.shape) pvals = pvals.reshape((1,) * (ndim - pvals.ndim + 1) + pvals.shape)
if size is not None: if size is not None:
size = tuple(size) size = tuple(size)
...@@ -609,14 +647,16 @@ def multinomial_helper(random_state, n, pvals, size): ...@@ -609,14 +647,16 @@ def multinomial_helper(random_state, n, pvals, size):
for dim in xrange(ndim): for dim in xrange(ndim):
dim_len = max(n.shape[dim], pvals.shape[dim]) dim_len = max(n.shape[dim], pvals.shape[dim])
size = size + (dim_len,) size = size + (dim_len,)
out_size = size+(pvals.shape[-1],) out_size = size + (pvals.shape[-1],)
# Build the indices over which to loop # Build the indices over which to loop
# Note that here, the rows (inner-most 1D subtensors) of pvals and out # Note that here, the rows (inner-most 1D subtensors) of pvals and out
# are indexed, not their individual elements # are indexed, not their individual elements
out = numpy.ndarray(out_size) out = numpy.ndarray(out_size)
broadcast_ind = _generate_broadcasting_indices(size, n.shape, pvals.shape[:-1]) broadcast_ind = _generate_broadcasting_indices(size, n.shape,
# Iterate over these indices, drawing from one multinomial at a time from numpy pvals.shape[:-1])
# Iterate over these indices, drawing from one multinomial at a
# time from numpy
assert pvals.min() >= 0 assert pvals.min() >= 0
for mi, ni, pi in zip(*broadcast_ind): for mi, ni, pi in zip(*broadcast_ind):
pvi = pvals[pi] pvi = pvals[pi]
...@@ -627,24 +667,24 @@ def multinomial_helper(random_state, n, pvals, size): ...@@ -627,24 +667,24 @@ def multinomial_helper(random_state, n, pvals, size):
# In perfect arithmetic this would be correct, but in float32 or # In perfect arithmetic this would be correct, but in float32 or
# float64 it is too strict. # float64 it is too strict.
pisum = numpy.sum(pvi) pisum = numpy.sum(pvi)
if 1.0 < pisum < 1.0+1e-5:#correct if we went a little over if 1.0 < pisum < 1.0 + 1e-5: # correct if we went a little over
# because mtrand.pyx has a ValueError that will trigger if # because mtrand.pyx has a ValueError that will trigger if
# sum(pvals[:-1]) > 1.0 # sum(pvals[:-1]) > 1.0
pvi = pvi * (1.0 - 5e-5) pvi = pvi * (1.0 - 5e-5)
#pvi = pvi * .9 #pvi = pvi * .9
pisum = numpy.sum(pvi) pisum = numpy.sum(pvi)
elif pvi[-1]<5e-5: #will this even work? elif pvi[-1] < 5e-5: # will this even work?
pvi = pvi * (1.0 - 5e-5) pvi = pvi * (1.0 - 5e-5)
pisum = numpy.sum(pvi) pisum = numpy.sum(pvi)
assert pisum<=1.0, pisum assert pisum <= 1.0, pisum
out[mi] = random_state.multinomial(n=n[ni], out[mi] = random_state.multinomial(n=n[ni],
pvals=pvi.astype('float64')) pvals=pvi.astype('float64'))
return out return out
def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
ndim=None, dtype='int64'): ndim=None, dtype='int64'):
""" """Sample from one or more multinomial distributions defined by
Sample from one or more multinomial distributions defined by
one-dimensional slices in pvals. one-dimensional slices in pvals.
:param pvals: a tensor of shape "nmulti+(L,)" describing each multinomial :param pvals: a tensor of shape "nmulti+(L,)" describing each multinomial
...@@ -657,15 +697,17 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -657,15 +697,17 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
right in nmulti. (See examples below.) right in nmulti. (See examples below.)
Default ``None`` means size=nmulti. Default ``None`` means size=nmulti.
:param n: the number of experiments to simulate for each multinomial. This :param n: the number of experiments to simulate for each
can be a scalar, or tensor, it will be broadcasted to have shape "nmulti". multinomial. This can be a scalar, or tensor, it will be
broadcasted to have shape "nmulti".
:param dtype: the dtype of the return value (which will represent counts) :param dtype: the dtype of the return value (which will represent counts)
:returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with the specified ``dtype``, :returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with
with the experiment counts. See examples to understand the shape of the the specified ``dtype``, with the experiment counts. See
return value, which is derived from both size and pvals.shape. examples to understand the shape of the return value, which is
In return value rval, "numpy.allclose(rval.sum(axis=-1), n)" will be true. derived from both size and pvals.shape. In return value rval,
"numpy.allclose(rval.sum(axis=-1), n)" will be true.
For example, to simulate n experiments from each multinomial in a batch of For example, to simulate n experiments from each multinomial in a batch of
size B: size B:
...@@ -685,11 +727,12 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -685,11 +727,12 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
Using size for broadcasting of pvals: Using size for broadcasting of pvals:
size=(10,1,-1), pvals.shape=(A,B,L) size=(10, 1, -1), pvals.shape=(A, B, L)
--> rval.shape=[10,1,B,L], and requires that A==1. --> rval.shape=[10,1,B,L], and requires that A==1.
rval[l,k,i,j] is the count of possibility j in the distribution specified rval[l,k,i,j] is the count of possibility j in the
by pvals[k,i], in the l'th of 10 draws. distribution specified by pvals[k,i], in the l'th of 10
draws.
""" """
n = tensor.as_tensor_variable(n) n = tensor.as_tensor_variable(n)
...@@ -697,9 +740,9 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -697,9 +740,9 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
# until ellipsis is implemented (argh) # until ellipsis is implemented (argh)
tmp = pvals.T[0].T tmp = pvals.T[0].T
ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp) ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp)
bcast = bcast+(pvals.type.broadcastable[-1],) bcast = bcast + (pvals.type.broadcastable[-1],)
op = RandomFunction(multinomial_helper, op = RandomFunction(multinomial_helper,
tensor.TensorType(dtype = dtype, broadcastable = bcast), tensor.TensorType(dtype=dtype, broadcastable=bcast),
ndim_added=1) ndim_added=1)
return op(random_state, size, n, pvals) return op(random_state, size, n, pvals)
...@@ -708,17 +751,20 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -708,17 +751,20 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
def random_make_inplace(node): def random_make_inplace(node):
op = node.op op = node.op
if isinstance(op, RandomFunction) and not op.inplace: if isinstance(op, RandomFunction) and not op.inplace:
new_op = RandomFunction(op.fn, op.outtype, inplace=True, ndim_added=op.ndim_added) new_op = RandomFunction(op.fn, op.outtype, inplace=True,
ndim_added=op.ndim_added)
return new_op.make_node(*node.inputs).outputs return new_op.make_node(*node.inputs).outputs
return False return False
optdb.register('random_make_inplace', opt.in2out(random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace') optdb.register('random_make_inplace', opt.in2out(random_make_inplace,
ignore_newtrees=True),
99, 'fast_run', 'inplace')
class RandomStreamsBase(object): class RandomStreamsBase(object):
def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob=None): def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
prob=None):
""" """
Sample n times with probability of success prob for each trial, Sample n times with probability of success prob for each trial,
return the number of successes. return the number of successes.
...@@ -754,7 +800,8 @@ class RandomStreamsBase(object): ...@@ -754,7 +800,8 @@ class RandomStreamsBase(object):
""" """
return self.gen(normal, size, avg, std, ndim=ndim, dtype=dtype) return self.gen(normal, size, avg, std, ndim=ndim, dtype=dtype)
def random_integers(self, size=None, low=0, high=1, ndim=None, dtype='int64'): def random_integers(self, size=None, low=0, high=1, ndim=None,
dtype='int64'):
""" """
Sample a random integer between low and high, both inclusive. Sample a random integer between low and high, both inclusive.
...@@ -762,7 +809,8 @@ class RandomStreamsBase(object): ...@@ -762,7 +809,8 @@ class RandomStreamsBase(object):
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing
information. information.
""" """
return self.gen(random_integers, size, low, high, ndim=ndim, dtype=dtype) return self.gen(random_integers, size, low, high, ndim=ndim,
dtype=dtype)
def permutation(self, size=None, n=1, ndim=None, dtype='int64'): def permutation(self, size=None, n=1, ndim=None, dtype='int64'):
""" """
...@@ -780,7 +828,8 @@ class RandomStreamsBase(object): ...@@ -780,7 +828,8 @@ class RandomStreamsBase(object):
""" """
return self.gen(permutation, size, n, ndim=ndim, dtype=dtype) return self.gen(permutation, size, n, ndim=ndim, dtype=dtype)
def multinomial(self, size=None, n=1, pvals=[0.5, 0.5], ndim=None, dtype='int64'): def multinomial(self, size=None, n=1, pvals=[0.5, 0.5], ndim=None,
dtype='int64'):
""" """
Sample n times from a multinomial distribution defined by Sample n times from a multinomial distribution defined by
probabilities pvals, as many times as required by size. For probabilities pvals, as many times as required by size. For
...@@ -802,7 +851,8 @@ class RandomStreamsBase(object): ...@@ -802,7 +851,8 @@ class RandomStreamsBase(object):
This uses permutation random variable internally, available via This uses permutation random variable internally, available via
the ``.permutation`` attribute of the return value. the ``.permutation`` attribute of the return value.
""" """
perm = self.permutation(size=input.shape[:-1], n=input.shape[-1], ndim=input.ndim-1) perm = self.permutation(size=input.shape[:-1], n=input.shape[-1],
ndim=input.ndim - 1)
shuffled = tensor.permute_row_elements(input, perm) shuffled = tensor.permute_row_elements(input, perm)
shuffled.permutation = perm shuffled.permutation = perm
return shuffled return shuffled
"""Define RandomStreams, providing random number variables for Theano graphs.""" """Define RandomStreams, providing random number variables for Theano
graphs.
"""
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import copy, sys import copy
import sys
import numpy import numpy
from theano.gof import Container from theano.gof import Container
from theano.compile.sharedvalue import SharedVariable, shared_constructor, shared from theano.compile.sharedvalue import (SharedVariable, shared_constructor,
shared)
import raw_random import raw_random
class RandomStateSharedVariable(SharedVariable): class RandomStateSharedVariable(SharedVariable):
pass pass
@shared_constructor @shared_constructor
def randomstate_constructor(value, name=None, strict=False, allow_downcast=None, borrow=False): def randomstate_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False):
"""SharedVariable Constructor for RandomState""" """SharedVariable Constructor for RandomState"""
if not isinstance(value, numpy.random.RandomState): if not isinstance(value, numpy.random.RandomState):
raise TypeError raise TypeError
...@@ -25,17 +33,26 @@ def randomstate_constructor(value, name=None, strict=False, allow_downcast=None, ...@@ -25,17 +33,26 @@ def randomstate_constructor(value, name=None, strict=False, allow_downcast=None,
strict=strict, strict=strict,
allow_downcast=allow_downcast) allow_downcast=allow_downcast)
class RandomStreams(raw_random.RandomStreamsBase): class RandomStreams(raw_random.RandomStreamsBase):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)""" """Module component with similar interface to numpy.random
(numpy.random.RandomState)
"""
state_updates = [] state_updates = []
"""A list of pairs of the form (input_r, output_r). This will be over-ridden by the module """A list of pairs of the form (input_r, output_r). This will be
instance to contain stream generators. over-ridden by the module instance to contain stream
generators.
""" """
default_instance_seed = None default_instance_seed = None
"""Instance variable should take None or integer value. Used to seed the random number """Instance variable should take None or integer value. Used to
generator that provides seeds for member streams""" seed the random number generator that provides seeds for member
streams
"""
gen_seedgen = None gen_seedgen = None
"""numpy.RandomState instance that gen() uses to seed new streams. """numpy.RandomState instance that gen() uses to seed new streams.
...@@ -48,8 +65,10 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -48,8 +65,10 @@ class RandomStreams(raw_random.RandomStreamsBase):
""" """
:type seed: None or int :type seed: None or int
:param seed: a default seed to initialize the RandomState instances after build. See :param seed: a default seed to initialize the RandomState
`RandomStreamsInstance.__init__` for more details. instances after build. See `RandomStreamsInstance.__init__`
for more details.
""" """
super(RandomStreams, self).__init__() super(RandomStreams, self).__init__()
self.state_updates = [] self.state_updates = []
...@@ -59,47 +78,54 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -59,47 +78,54 @@ class RandomStreams(raw_random.RandomStreamsBase):
def seed(self, seed=None): def seed(self, seed=None):
"""Re-initialize each random stream """Re-initialize each random stream
:param seed: each random stream will be assigned a unique state that depends :param seed: each random stream will be assigned a unique
deterministically on this value. state that depends deterministically on this value.
:type seed: None or integer in range 0 to 2**30 :type seed: None or integer in range 0 to 2**30
:rtype: None :rtype: None
""" """
if seed is None: if seed is None:
seed = self.default_instance_seed seed = self.default_instance_seed
seedgen = numpy.random.RandomState(seed) seedgen = numpy.random.RandomState(seed)
for old_r, new_r in self.state_updates: for old_r, new_r in self.state_updates:
old_r_seed = seedgen.randint(2**30) old_r_seed = seedgen.randint(2 ** 30)
old_r.set_value(numpy.random.RandomState(int(old_r_seed)), old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
borrow=True) borrow=True)
def __getitem__(self, item): def __getitem__(self, item):
"""Retrieve the numpy RandomState instance associated with a particular stream """Retrieve the numpy RandomState instance associated with a
particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream :param item: a variable of type RandomStateType, associated
with this RandomStream
:rtype: numpy RandomState (or None, before initialize) :rtype: numpy RandomState (or None, before initialize)
:note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`. The :note: This is kept for compatibility with
simpler syntax ``item.rng.get_value()`` is also valid. `tensor.randomstreams.RandomStreams`. The simpler syntax
``item.rng.get_value()`` is also valid.
""" """
return item.get_value(borrow=True) return item.get_value(borrow=True)
def __setitem__(self, item, val): def __setitem__(self, item, val):
"""Set the numpy RandomState instance associated with a particular stream """Set the numpy RandomState instance associated with a
particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream :param item: a variable of type RandomStateType, associated
with this RandomStream
:param val: the new value :param val: the new value
:type val: numpy RandomState :type val: numpy RandomState
:rtype: None :rtype: None
:note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`. The :note: This is kept for compatibility with
simpler syntax ``item.rng.set_value(val)`` is also valid. `tensor.randomstreams.RandomStreams`. The simpler syntax
``item.rng.set_value(val)`` is also valid.
""" """
item.set_value(val, borrow=True) item.set_value(val, borrow=True)
...@@ -113,12 +139,14 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -113,12 +139,14 @@ class RandomStreams(raw_random.RandomStreamsBase):
:param kwargs: interpreted by `op` :param kwargs: interpreted by `op`
:returns: The symbolic random draw part of op()'s return value. This function stores :returns: The symbolic random draw part of op()'s return
the updated RandomStateType Variable for use at `build` time. value. This function stores the updated RandomStateType
Variable for use at `build` time.
:rtype: TensorVariable :rtype: TensorVariable
""" """
seed = int(self.gen_seedgen.randint(2**30)) seed = int(self.gen_seedgen.randint(2 ** 30))
random_state_variable = shared(numpy.random.RandomState(seed)) random_state_variable = shared(numpy.random.RandomState(seed))
new_r, out = op(random_state_variable, *args, **kwargs) new_r, out = op(random_state_variable, *args, **kwargs)
out.rng = random_state_variable out.rng = random_state_variable
......
...@@ -50,8 +50,8 @@ class TestDownsampleFactorMax(unittest.TestCase): ...@@ -50,8 +50,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
for maxpoolshp in maxpoolshps: for maxpoolshp in maxpoolshps:
for ignore_border in [True,False]: for ignore_border in [True,False]:
print 'maxpoolshp =', maxpoolshp #print 'maxpoolshp =', maxpoolshp
print 'ignore_border =', ignore_border #print 'ignore_border =', ignore_border
## Pure Numpy computation ## Pure Numpy computation
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border) numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
...@@ -74,8 +74,8 @@ class TestDownsampleFactorMax(unittest.TestCase): ...@@ -74,8 +74,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
for maxpoolshp in maxpoolshps: for maxpoolshp in maxpoolshps:
for ignore_border in [True,False]: for ignore_border in [True,False]:
print 'maxpoolshp =', maxpoolshp #print 'maxpoolshp =', maxpoolshp
print 'ignore_border =', ignore_border #print 'ignore_border =', ignore_border
def mp(input): def mp(input):
return DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(input) return DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(input)
utt.verify_grad(mp, [imval], rng=rng) utt.verify_grad(mp, [imval], rng=rng)
...@@ -89,8 +89,8 @@ class TestDownsampleFactorMax(unittest.TestCase): ...@@ -89,8 +89,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
for maxpoolshp in maxpoolshps: for maxpoolshp in maxpoolshps:
for ignore_border in [True,False]: for ignore_border in [True,False]:
print 'maxpoolshp =', maxpoolshp #print 'maxpoolshp =', maxpoolshp
print 'ignore_border =', ignore_border #print 'ignore_border =', ignore_border
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border) numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
output = max_pool_2d(images, maxpoolshp, ignore_border) output = max_pool_2d(images, maxpoolshp, ignore_border)
...@@ -110,8 +110,8 @@ class TestDownsampleFactorMax(unittest.TestCase): ...@@ -110,8 +110,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
for maxpoolshp in maxpoolshps: for maxpoolshp in maxpoolshps:
for ignore_border in [True,False]: for ignore_border in [True,False]:
print 'maxpoolshp =', maxpoolshp #print 'maxpoolshp =', maxpoolshp
print 'ignore_border =', ignore_border #print 'ignore_border =', ignore_border
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border) numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
output = max_pool_2d(images, maxpoolshp, ignore_border) output = max_pool_2d(images, maxpoolshp, ignore_border)
...@@ -144,8 +144,8 @@ class TestDownsampleFactorMax(unittest.TestCase): ...@@ -144,8 +144,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
for maxpoolshp in maxpoolshps: for maxpoolshp in maxpoolshps:
for ignore_border in [True,False]: for ignore_border in [True,False]:
print 'maxpoolshp =', maxpoolshp #print 'maxpoolshp =', maxpoolshp
print 'ignore_border =', ignore_border #print 'ignore_border =', ignore_border
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border) numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
output = max_pool_2d(images, maxpoolshp, ignore_border) output = max_pool_2d(images, maxpoolshp, ignore_border)
......
...@@ -264,7 +264,7 @@ def test_mlp(): ...@@ -264,7 +264,7 @@ def test_mlp():
###################### ######################
# BUILD ACTUAL MODEL # # BUILD ACTUAL MODEL #
###################### ######################
print '... building the model' #print '... building the model'
# allocate symbolic variables for the data # allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch index = T.lscalar() # index to a [mini]batch
...@@ -302,8 +302,8 @@ def test_mlp(): ...@@ -302,8 +302,8 @@ def test_mlp():
x:train_set_x[index*batch_size:(index+1)*batch_size], x:train_set_x[index*batch_size:(index+1)*batch_size],
y:train_set_y[index*batch_size:(index+1)*batch_size]}, y:train_set_y[index*batch_size:(index+1)*batch_size]},
mode=mode) mode=mode)
print 'MODEL 1' #print 'MODEL 1'
theano.printing.debugprint(train_model, print_type=True) #theano.printing.debugprint(train_model, print_type=True)
assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()]) assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
# Even without FeatureShape # Even without FeatureShape
...@@ -313,9 +313,9 @@ def test_mlp(): ...@@ -313,9 +313,9 @@ def test_mlp():
givens={ givens={
x:train_set_x[index*batch_size:(index+1)*batch_size], x:train_set_x[index*batch_size:(index+1)*batch_size],
y:train_set_y[index*batch_size:(index+1)*batch_size]}) y:train_set_y[index*batch_size:(index+1)*batch_size]})
print #print
print 'MODEL 2' #print 'MODEL 2'
theano.printing.debugprint(train_model, print_type=True) #theano.printing.debugprint(train_model, print_type=True)
assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()]) assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -3049,7 +3049,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3049,7 +3049,7 @@ class T_Join_and_Split(unittest.TestCase):
s = stack(a, b, a, b) s = stack(a, b, a, b)
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
val = f(1, 2) val = f(1, 2)
print val #print val
self.assertTrue(numpy.all(val == [1, 2, 1, 2])) self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0 assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
...@@ -3588,8 +3588,8 @@ class T_add(unittest.TestCase): ...@@ -3588,8 +3588,8 @@ class T_add(unittest.TestCase):
("/", lambda x,y: x/y)) ("/", lambda x,y: x/y))
for s, fn in tests: for s, fn in tests:
f = inplace_func([a,b], fn(a, b)) f = inplace_func([a,b], fn(a, b))
print 'valid output:', fn(a.data, b.data) #print 'valid output:', fn(a.data, b.data)
print 'theano output:', f(a.data, b.data) #print 'theano output:', f(a.data, b.data)
self.assertTrue(a.type.values_eq_approx(fn(a.data, b.data), f(a.data, b.data))) self.assertTrue(a.type.values_eq_approx(fn(a.data, b.data), f(a.data, b.data)))
def test_grad_scalar_l(self): def test_grad_scalar_l(self):
...@@ -4385,8 +4385,8 @@ class TestARange(unittest.TestCase): ...@@ -4385,8 +4385,8 @@ class TestARange(unittest.TestCase):
df = function([dstart, dstop], dout) df = function([dstart, dstop], dout)
assert dout.dtype == dstart.type.dtype assert dout.dtype == dstart.type.dtype
print df(0.2, 5.3) #print df(0.2, 5.3)
print numpy.arange(0.2, 5.3) #print numpy.arange(0.2, 5.3)
assert numpy.all(df(0.2, 5.3) == numpy.arange(0.2, 5.3)) assert numpy.all(df(0.2, 5.3) == numpy.arange(0.2, 5.3))
assert numpy.all(df(0.8, 5.3) == numpy.arange(0.8, 5.3)) assert numpy.all(df(0.8, 5.3) == numpy.arange(0.8, 5.3))
assert numpy.all(df(-0.7, 5.3) == numpy.arange(-0.7, 5.3)) assert numpy.all(df(-0.7, 5.3) == numpy.arange(-0.7, 5.3))
...@@ -4957,8 +4957,8 @@ def test_var(): ...@@ -4957,8 +4957,8 @@ def test_var():
f = function([a], var(a)) f = function([a], var(a))
a_val = numpy.arange(60).reshape(3,4,5) a_val = numpy.arange(60).reshape(3,4,5)
print numpy.var(a_val) #print numpy.var(a_val)
print f(a_val) #print f(a_val)
assert numpy.allclose(numpy.var(a_val), f(a_val)) assert numpy.allclose(numpy.var(a_val), f(a_val))
f = function([a], var(a, axis=0)) f = function([a], var(a, axis=0))
...@@ -4994,9 +4994,9 @@ def test_default(): ...@@ -4994,9 +4994,9 @@ def test_default():
"It is actually a problem of DEBUG_MODE, see #626.")) "It is actually a problem of DEBUG_MODE, see #626."))
def test_default_state(): def test_default_state():
x, y = scalars('xy') x, y = scalars('xy')
print config.floatX #print config.floatX
print x.type #print x.type
print y.type #print y.type
z = default(x, 3.8) z = default(x, 3.8)
new_x = y + z new_x = y + z
f = function([y, compile.In(x, update = new_x, value = 12.0)], new_x) f = function([y, compile.In(x, update = new_x, value = 12.0)], new_x)
......
...@@ -185,7 +185,7 @@ class QuadraticDenoisingAA(module.Module): ...@@ -185,7 +185,7 @@ class QuadraticDenoisingAA(module.Module):
#self.validate = theano.Method(self.input, [self.cost, self.output]) #self.validate = theano.Method(self.input, [self.cost, self.output])
def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale): def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale):
print 'QDAA init' #print 'QDAA init'
""" """
qfilter_relscale is the initial range for any quadratic filters (relative to the linear qfilter_relscale is the initial range for any quadratic filters (relative to the linear
filter's initial range) filter's initial range)
...@@ -454,11 +454,11 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -454,11 +454,11 @@ class ConvolutionalMLP(module.FancyModule):
i.initialize(input_size=self.input_size, i.initialize(input_size=self.input_size,
hidden_size=self.input_representation_size, noise_level=noise_level, hidden_size=self.input_representation_size, noise_level=noise_level,
seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale) seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
print type(i.w1) #print type(i.w1)
assert isinstance(i.w1, N.ndarray) assert isinstance(i.w1, N.ndarray)
for i in self.input_representations[1:]: for i in self.input_representations[1:]:
print type(i.w1) #print type(i.w1)
assert isinstance(i.w1, N.ndarray) assert isinstance(i.w1, N.ndarray)
assert (i.w1 == self.input_representations[0].w1).all() assert (i.w1 == self.input_representations[0].w1).all()
assert (i.w2 == self.input_representations[0].w2).all() assert (i.w2 == self.input_representations[0].w2).all()
...@@ -528,7 +528,7 @@ def create_realistic(window_size=3,#7, ...@@ -528,7 +528,7 @@ def create_realistic(window_size=3,#7,
def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
optimizer=None, realistic=False): optimizer=None, realistic=False):
print "BUILDING MODEL" #print "BUILDING MODEL"
import time import time
t = time.time() t = time.time()
...@@ -545,7 +545,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -545,7 +545,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
else: else:
m = create(compile_mode=mode) m = create(compile_mode=mode)
print 'BUILD took %.3fs'%(time.time() - t) #print 'BUILD took %.3fs'%(time.time() - t)
prog_str = [] prog_str = []
idx_of_node = {} idx_of_node = {}
for i, node in enumerate(m.pretraining_update.maker.env.toposort()): for i, node in enumerate(m.pretraining_update.maker.env.toposort()):
...@@ -557,7 +557,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -557,7 +557,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
#sys.exit() #sys.exit()
print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str)) #print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
rng = N.random.RandomState(unittest_tools.fetch_seed(23904)) rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
...@@ -565,35 +565,35 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -565,35 +565,35 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
targets = N.asarray([0,3,4,2,3,4,4,2,1,0]) targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
#print inputs #print inputs
print 'UNSUPERVISED PHASE' #print 'UNSUPERVISED PHASE'
t = time.time() t = time.time()
for i in xrange(3): for i in xrange(3):
for j in xrange(iters_per_unsup): for j in xrange(iters_per_unsup):
m.pretraining_update(*inputs) m.pretraining_update(*inputs)
s0, s1 = [str(j) for j in m.pretraining_update(*inputs)] s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1 #print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if iters_per_unsup == 3: if iters_per_unsup == 3:
assert s0.startswith('0.927793')#'0.403044') assert s0.startswith('0.927793')#'0.403044')
assert s1.startswith('0.068035')#'0.074898') assert s1.startswith('0.068035')#'0.074898')
print 'UNSUPERVISED took %.3fs'%(time.time() - t) #print 'UNSUPERVISED took %.3fs'%(time.time() - t)
print 'FINETUNING GRAPH' #print 'FINETUNING GRAPH'
print 'SUPERVISED PHASE COSTS (%s)'%optimizer #print 'SUPERVISED PHASE COSTS (%s)'%optimizer
t = time.time() t = time.time()
for i in xrange(3): for i in xrange(3):
for j in xrange(iters_per_unsup): for j in xrange(iters_per_unsup):
m.finetuning_update(*(inputs + [targets])) m.finetuning_update(*(inputs + [targets]))
s0 = str(m.finetuning_update(*(inputs + [targets]))) s0 = str(m.finetuning_update(*(inputs + [targets])))
print iters_per_sup * (i+1), s0 #print iters_per_sup * (i+1), s0
if iters_per_sup == 10: if iters_per_sup == 10:
s0f = float(s0) s0f = float(s0)
assert 19.7042 < s0f and s0f < 19.7043 assert 19.7042 < s0f and s0f < 19.7043
print 'SUPERVISED took %.3fs'%( time.time() - t) #print 'SUPERVISED took %.3fs'%( time.time() - t)
def jtest_main(): def jtest_main():
from theano import gof from theano import gof
JTEST = theano.compile.mode.optdb.query(*sys.argv[2:]) JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
print 'JTEST', JTEST #print 'JTEST', JTEST
theano.compile.register_optimizer('JTEST', JTEST) theano.compile.register_optimizer('JTEST', JTEST)
optimizer = eval(sys.argv[1]) optimizer = eval(sys.argv[1])
test_naacl_model(optimizer, 10, 10, realistic=False) test_naacl_model(optimizer, 10, 10, realistic=False)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论