提交 b1402c8b authored 作者: abergeron's avatar abergeron

Merge pull request #1802 from nouiz/f32

Fix buildbot tests error in float32
......@@ -15,7 +15,7 @@ Scan
- ``sum()`` could be computed by scanning the *z + x(i)* function over a list, given an initial state of *z=0*.
- Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
- Advantages of using ``scan`` over *for* loops:
- Number of iterations to be part of the symbolic graph.
- Minimizes GPU transfers (if GPU is involved).
- Computes gradients through sequential steps.
......@@ -30,34 +30,34 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano
import theano.tensor as T
import numpy as np
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), sequences=X)
compute_elementwise = theano.function(inputs = [X, W, b_sym], outputs=[results])
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=[results])
# test values
x = np.eye(2)
w = np.ones((2,2))
b = np.ones((2))
x = np.eye(2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2
print compute_elementwise(x, w, b)[0]
# comparison with numpy
print np.tanh(x.dot(w) + b)
**Scan Example: Computing the sequence x(t) = tanh(x(t-1).dot(W) + y(t).dot(U) + p(T-t).dot(V))**
**Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))**
.. code-block:: python
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
......@@ -66,30 +66,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")
results, updates = theano.scan(lambda
y,p,x_tm1:T.tanh(T.dot(x_tm1,W)+T.dot(y,U)+T.dot(p,V)),
sequences=[Y,P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs = [X, W, Y, U, P, V], outputs=[results])
results, updates = theano.scan(lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T.dot(p, V)),
sequences=[Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=[results])
# test values
x = np.zeros((2))
x = np.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = np.ones((2,2))
y = np.ones((5,2))
y[0,:] = -3
u = np.ones((2,2))
p = np.ones((5,2))
p[0,:] = 3
v = np.ones((2,2))
print compute_seq(x,w,y,u,p,v)[0]
w = np.ones((2, 2), dtype=theano.config.floatX)
y = np.ones((5, 2), dtype=theano.config.floatX)
y[0, :] = -3
u = np.ones((2, 2), dtype=theano.config.floatX)
p = np.ones((5, 2), dtype=theano.config.floatX)
p[0, :] = 3
v = np.ones((2, 2), dtype=theano.config.floatX)
print compute_seq(x, w, y, u, p, v)[0]
# comparison with numpy
x_res = np.zeros((5,2))
x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1,5):
x_res[i] = np.tanh(x_res[i-1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
for i in range(1, 5):
x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print x_res
**Scan Example: Computing norms of lines of X**
......@@ -97,18 +97,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano
import theano.tensor as T
import numpy as np
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X])
compute_norm_lines = theano.function(inputs = [X], outputs=[results])
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
compute_norm_lines = theano.function(inputs=[X], outputs=[results])
# test value
x = np.diag(np.arange(1,6),1)
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_lines(x)[0]
# comparison with numpy
print np.sqrt((x**2).sum(1))
print np.sqrt((x ** 2).sum(1))
**Scan Example: Computing norms of columns of X**
......@@ -116,18 +116,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano
import theano.tensor as T
import numpy as np
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs = [X], outputs=[results])
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=[results])
# test value
x = np.diag(np.arange(1,6),1)
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_cols(x)[0]
# comparison with numpy
print np.sqrt((x**2).sum(0))
print np.sqrt((x ** 2).sum(0))
**Scan Example: Computing trace of X**
......@@ -136,30 +136,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano.tensor as T
import numpy as np
floatX = "float32"
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j]+t_f, floatX), \
sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])], \
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
outputs_info=np.asarray(0., dtype=floatX))
result = results[-1]
compute_trace = theano.function(inputs = [X], outputs=[result])
compute_trace = theano.function(inputs=[X], outputs=[result])
# test value
x = np.eye(5)
x[0] = np.arange(5)
compute_trace(x)[0]
x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5, dtype=theano.config.floatX)
print compute_trace(x)[0]
# comparison with numpy
print np.diagonal(x).sum()
**Scan Example: Computing the sequence x(t) = x(t-2).dot(U) + x(t-1).dot(V) + tanh(x(t-1).dot(W) + b)**
**Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)**
.. code-block:: python
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
......@@ -168,31 +168,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) + T.dot(x_tm1,V) \
+ T.tanh(T.dot(x_tm1,W) + b_sym), \
n_steps=n_sym, outputs_info=[dict(initial = X, taps = [-2,-1])])
compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, n_sym], outputs=[results])
results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=[results])
# test values
x = np.zeros((2,2)) # the initial value must be able to return x[-2]
x[1,1] = 1
w = 0.5*np.ones((2,2))
u = 0.5*(np.ones((2,2))-np.eye(2))
v = 0.5*np.ones((2,2))
x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
x[1, 1] = 1
w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = np.ones((2))
print compute_seq2(x,u,v,w,b,n)
b = np.ones((2), dtype=theano.config.floatX)
print compute_seq2(x, u, v, w, b, n)
# comparison with numpy
x_res = numpy.zeros((10,2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + numpy.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+ numpy.tanh(x_res[1].dot(w) + b)
for i in range(2,10):
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \
+ numpy.tanh(x_res[i-1].dot(w) + b))
x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
np.tanh(x_res[i - 1].dot(w) + b))
print x_res
**Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x**
......@@ -200,24 +199,24 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v,A))
results, updates = theano.scan(lambda i:T.grad(y[i], v), sequences = [T.arange(y.shape[0])])
compute_jac_t = theano.function([A,v], [results], allow_input_downcast = True) # shape (d_out, d_in)
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], [results], allow_input_downcast=True) # shape (d_out, d_in)
# test values
x = np.eye(5)[0]
w = np.eye(5,3)
w[2] = np.ones((3))
print compute_jac_t(w,x)[0]
x = np.eye(5, dtype=theano.config.floatX)[0]
w = np.eye(5, 3, dtype=theano.config.floatX)
w[2] = np.ones((3), dtype=theano.config.floatX)
print compute_jac_t(w, x)[0]
# compare with numpy
print ((1 - np.tanh(x.dot(w))**2)*w).T
print ((1 - np.tanh(x.dot(w)) ** 2) * w).T
Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.
Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.
**Scan Example: Accumulate number of loop during a scan**
......@@ -225,46 +224,46 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
import theano
import theano.tensor as T
import numpy as np
# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast = True)
results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
k.get_value()
accumulator(5)
k.get_value()
**Scan Example: Computing tanh(v.dot(W) + b)*d where b is binomial**
**Scan Example: Computing tanh(v.dot(W) + b) * d where b is binomial**
.. code-block:: python
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym)*d, sequences=X)
compute_with_bnoise = theano.function(inputs = [X, W, b_sym], outputs=[results], \
updates=updates, allow_input_downcast = True)
x = np.eye(10,2)
w = np.ones((2,2))
b = np.ones((2))
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=[results],
updates=updates, allow_input_downcast=True)
x = np.eye(10, 2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
print compute_with_bnoise(x, w, b)
Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments.
**Scan Example: Computing pow(A,k)**
**Scan Example: Computing pow(A, k)**
.. code-block:: python
......@@ -286,11 +285,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
# Scan has provided us with A ** 1 through A ** k. Keep only the last
# value. Scan notices this and does not waste memory saving them.
final_result = result[-1]
power = theano.function(inputs=[A, k], outputs=final_result,
updates=updates)
print power(range(10),2)
print power(range(10), 2)
#[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.]
......
......@@ -1146,7 +1146,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp)
@attr('slow')
......@@ -1156,7 +1157,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp)
def test_gpu_fusion(self):
......@@ -1164,10 +1166,12 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'canonicalize', 'gpu')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else:
mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'canonicalize', 'gpu')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
......@@ -1179,10 +1183,12 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'canonicalize', 'gpu')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else:
mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'canonicalize', 'gpu')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
......@@ -1278,7 +1284,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize', 'inplace')
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'inplace')
x, y, z = dmatrices('xyz')
f = theano.function([x, y, z], tensor.dot(x, y) + x + y + z, mode=mode)
......
......@@ -1137,213 +1137,209 @@ class T_graphstructures(unittest.TestCase):
assert e.owner.inputs[1].owner.inputs[0] is y
assert e.owner.inputs[1].owner.inputs[1] is z
class T_scan(unittest.TestCase):
## All tests here belong to
## http://deeplearning.net/software/theano/tutorial/loop.html
## Theano/doc/tutorial/loop.txt
## Any change you do here also add it to the tutorial !
def test_elemwise(self):
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), \
sequences=X)
compute_elementwise = theano.function(inputs = [X, W, b_sym], \
outputs=[results])
# test values
x = numpy.eye(2)
w = numpy.ones((2,2))
b = numpy.ones((2))
b[1] = 2
print "Scan results:", compute_elementwise(x, w, b)[0]
# comparison with numpy
print "Numpy results:", numpy.tanh(x.dot(w) + b)
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym),
sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym],
outputs=[results])
# test values
x = numpy.eye(2, dtype=theano.config.floatX)
w = numpy.ones((2, 2), dtype=theano.config.floatX)
b = numpy.ones((2), dtype=theano.config.floatX)
b[1] = 2
print "Scan results:", compute_elementwise(x, w, b)[0]
# comparison with numpy
print "Numpy results:", numpy.tanh(x.dot(w) + b)
def test_sequence(self):
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")
results, updates = theano.scan(lambda \
y,p,x_tm1:T.tanh(T.dot(x_tm1,W) + \
T.dot(y,U)+T.dot(p,V)), \
sequences=[Y,P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs = [X, W, Y, U, P, V], \
outputs=[results])
# test values
x = numpy.zeros((2))
x[1] = 1
w = numpy.ones((2,2))
y = numpy.ones((5,2))
y[0,:] = -3
u = numpy.ones((2,2))
p = numpy.ones((5,2))
p[0,:] = 3
v = numpy.ones((2,2))
print "Scan results", compute_seq(x,w,y,u,p,v)[0]
# comparison with numpy
x_res = numpy.zeros((5,2))
x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1,5):
x_res[i] = numpy.tanh(x_res[i-1].dot(w) \
+ y[i].dot(u) + p[4-i].dot(v))
print "Numpy results:", x_res
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")
results, updates = theano.scan(
lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) +
T.dot(y, U) + T.dot(p, V)),
sequences=[Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V],
outputs=[results])
# test values
x = numpy.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = numpy.ones((2, 2), dtype=theano.config.floatX)
y = numpy.ones((5, 2), dtype=theano.config.floatX)
y[0, :] = -3
u = numpy.ones((2, 2), dtype=theano.config.floatX)
p = numpy.ones((5, 2), dtype=theano.config.floatX)
p[0, :] = 3
v = numpy.ones((2, 2), dtype=theano.config.floatX)
print "Scan results", compute_seq(x, w, y, u, p, v)[0]
# comparison with numpy
x_res = numpy.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
x_res[i] = numpy.tanh(x_res[i-1].dot(w) +
y[i].dot(u) + p[4-i].dot(v))
print "Numpy results:", x_res
def test_norm(self):
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \
sequences=[X])
compute_norm_lines = theano.function(inputs = [X], outputs=[results])
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \
sequences=[X.T])
compute_norm_cols = theano.function(inputs = [X], outputs=[results])
# test value
x = numpy.diag(numpy.arange(1,6),1)
print "Scan results:", compute_norm_lines(x)[0], \
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X])
compute_norm_lines = theano.function(inputs=[X], outputs=[results])
results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=[results])
# test value
x = numpy.diag(numpy.arange(1, 6, dtype=theano.config.floatX), 1)
print "Scan results:", compute_norm_lines(x)[0], \
compute_norm_cols(x)[0]
# comparison with numpy
print "Numpy results:", numpy.sqrt((x**2).sum(1)), \
# comparison with numpy
print "Numpy results:", numpy.sqrt((x**2).sum(1)), \
numpy.sqrt((x**2).sum(0))
def test_trace(self):
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j] + \
t_f, theano.config.floatX), \
sequences=[T.arange(X.shape[0]), \
T.arange(X.shape[1])], \
outputs_info=numpy.asarray(0., \
dtype=theano.config.floatX))
result = results[-1]
compute_trace = theano.function(inputs = [X], outputs=[result])
# test value
x = numpy.eye(5)
x[0] = numpy.arange(5)
print "Scan results:", compute_trace(x)[0]
# comparison with numpy
print "Numpy results:", numpy.diagonal(x).sum()
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i,j] +
t_f, theano.config.floatX),
sequences=[T.arange(X.shape[0]),
T.arange(X.shape[1])],
outputs_info=numpy.asarray(
0., dtype=theano.config.floatX))
result = results[-1]
compute_trace = theano.function(inputs=[X], outputs=[result])
# test value
x = numpy.eye(5, dtype=theano.config.floatX)
x[0] = numpy.arange(5, dtype=theano.config.floatX)
print "Scan results:", compute_trace(x)[0]
# comparison with numpy
print "Numpy results:", numpy.diagonal(x).sum()
def test_taps(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) \
+ T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym), \
n_steps=n_sym, \
outputs_info=[dict(initial = X, taps = [-2,-1])])
compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, \
n_sym], outputs=[results])
# test values
x = numpy.zeros((2,2))
# the initial value must be able to return x[-2]
x[1,1] = 1
w = 0.5*numpy.ones((2,2))
u = 0.5*(numpy.ones((2,2))-numpy.eye(2))
v = 0.5*numpy.ones((2,2))
n = 10
b = numpy.ones((2))
print "Scan results:", compute_seq2(x,u,v,w,b,n)
# comparison with numpy
x_res = numpy.zeros((10,2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(
lambda x_tm2,x_tm1: T.dot(x_tm2,U) + T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym),
n_steps=n_sym,
outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym],
outputs=[results])
# test values
x = numpy.zeros((2, 2), dtype=theano.config.floatX)
# the initial value must be able to return x[-2]
x[1, 1] = 1
w = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (numpy.ones((2, 2), dtype=theano.config.floatX) -
numpy.eye(2, dtype=theano.config.floatX))
v = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = numpy.ones((2), dtype=theano.config.floatX)
print "Scan results:", compute_seq2(x, u, v, w, b, n)
# comparison with numpy
x_res = numpy.zeros((10, 2), dtype=theano.config.floatX)
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
+ numpy.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+ numpy.tanh(x_res[1].dot(w) + b)
for i in range(2,10):
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \
+ numpy.tanh(x_res[i-1].dot(w) + b))
print "Numpy results:", x_res
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+ numpy.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) +
numpy.tanh(x_res[i-1].dot(w) + b))
print "Numpy results:", x_res
def test_jacobian(self):
# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v,A))
results, updates = theano.scan(lambda i:T.grad(y[i], v), \
sequences = [T.arange(y.shape[0])])
compute_jac_t = theano.function([A,v], [results], \
allow_input_downcast = True) # shape (d_out, d_in)
# test values
x = numpy.eye(5)[0]
w = numpy.eye(5,3)
w[2] = numpy.ones((3))
print "Scan results:", compute_jac_t(w,x)[0]
# compare with numpy
print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T
# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v),
sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], [results],
allow_input_downcast=True) # shape (d_out, d_in)
# test values
x = numpy.eye(5)[0]
w = numpy.eye(5, 3)
w[2] = numpy.ones((3))
print "Scan results:", compute_jac_t(w, x)[0]
# compare with numpy
print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T
def test_accumulator(self):
# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, \
allow_input_downcast = True)
print "Before 5 steps:", k.get_value()
accumulator(5)
print "After 5 steps:", k.get_value()
def test_random(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W) \
+ b_sym)*d, sequences=X)
compute_with_bnoise = theano.function(inputs = [X, W, b_sym], \
outputs=[results], \
updates=updates, \
allow_input_downcast = True)
x = numpy.eye(10,2)
w = numpy.ones((2,2))
b = numpy.ones((2))
print compute_with_bnoise(x, w, b)
# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates,
allow_input_downcast=True)
print "Before 5 steps:", k.get_value()
accumulator(5)
print "After 5 steps:", k.get_value()
def test_random(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d = trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym],
outputs=[results],
updates=updates,
allow_input_downcast = True)
x = numpy.eye(10,2)
w = numpy.ones((2,2))
b = numpy.ones((2))
print compute_with_bnoise(x, w, b)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论