提交 b1402c8b authored 作者: abergeron's avatar abergeron

Merge pull request #1802 from nouiz/f32

Fix buildbot tests error in float32
...@@ -15,7 +15,7 @@ Scan ...@@ -15,7 +15,7 @@ Scan
- ``sum()`` could be computed by scanning the *z + x(i)* function over a list, given an initial state of *z=0*. - ``sum()`` could be computed by scanning the *z + x(i)* function over a list, given an initial state of *z=0*.
- Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping. - Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
- Advantages of using ``scan`` over *for* loops: - Advantages of using ``scan`` over *for* loops:
- Number of iterations to be part of the symbolic graph. - Number of iterations to be part of the symbolic graph.
- Minimizes GPU transfers (if GPU is involved). - Minimizes GPU transfers (if GPU is involved).
- Computes gradients through sequential steps. - Computes gradients through sequential steps.
...@@ -30,34 +30,34 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -30,34 +30,34 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# defining the tensor variables # defining the tensor variables
X = T.matrix("X") X = T.matrix("X")
W = T.matrix("W") W = T.matrix("W")
b_sym = T.vector("b_sym") b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), sequences=X) results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs = [X, W, b_sym], outputs=[results]) compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=[results])
# test values # test values
x = np.eye(2) x = np.eye(2, dtype=theano.config.floatX)
w = np.ones((2,2)) w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2)) b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2 b[1] = 2
print compute_elementwise(x, w, b)[0] print compute_elementwise(x, w, b)[0]
# comparison with numpy # comparison with numpy
print np.tanh(x.dot(w) + b) print np.tanh(x.dot(w) + b)
**Scan Example: Computing the sequence x(t) = tanh(x(t-1).dot(W) + y(t).dot(U) + p(T-t).dot(V))**
**Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))**
.. code-block:: python .. code-block:: python
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variables # define tensor variables
X = T.vector("X") X = T.vector("X")
W = T.matrix("W") W = T.matrix("W")
...@@ -66,30 +66,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -66,30 +66,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
Y = T.matrix("Y") Y = T.matrix("Y")
V = T.matrix("V") V = T.matrix("V")
P = T.matrix("P") P = T.matrix("P")
results, updates = theano.scan(lambda results, updates = theano.scan(lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T.dot(p, V)),
y,p,x_tm1:T.tanh(T.dot(x_tm1,W)+T.dot(y,U)+T.dot(p,V)), sequences=[Y, P[::-1]], outputs_info=[X])
sequences=[Y,P[::-1]], outputs_info=[X]) compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=[results])
compute_seq = theano.function(inputs = [X, W, Y, U, P, V], outputs=[results])
# test values # test values
x = np.zeros((2)) x = np.zeros((2), dtype=theano.config.floatX)
x[1] = 1 x[1] = 1
w = np.ones((2,2)) w = np.ones((2, 2), dtype=theano.config.floatX)
y = np.ones((5,2)) y = np.ones((5, 2), dtype=theano.config.floatX)
y[0,:] = -3 y[0, :] = -3
u = np.ones((2,2)) u = np.ones((2, 2), dtype=theano.config.floatX)
p = np.ones((5,2)) p = np.ones((5, 2), dtype=theano.config.floatX)
p[0,:] = 3 p[0, :] = 3
v = np.ones((2,2)) v = np.ones((2, 2), dtype=theano.config.floatX)
print compute_seq(x,w,y,u,p,v)[0] print compute_seq(x, w, y, u, p, v)[0]
# comparison with numpy # comparison with numpy
x_res = np.zeros((5,2)) x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v)) x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1,5): for i in range(1, 5):
x_res[i] = np.tanh(x_res[i-1].dot(w) + y[i].dot(u) + p[4-i].dot(v)) x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print x_res
**Scan Example: Computing norms of lines of X** **Scan Example: Computing norms of lines of X**
...@@ -97,18 +97,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -97,18 +97,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variable # define tensor variable
X = T.matrix("X") X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X]) results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
compute_norm_lines = theano.function(inputs = [X], outputs=[results]) compute_norm_lines = theano.function(inputs=[X], outputs=[results])
# test value # test value
x = np.diag(np.arange(1,6),1) x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_lines(x)[0] print compute_norm_lines(x)[0]
# comparison with numpy # comparison with numpy
print np.sqrt((x**2).sum(1)) print np.sqrt((x ** 2).sum(1))
**Scan Example: Computing norms of columns of X** **Scan Example: Computing norms of columns of X**
...@@ -116,18 +116,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -116,18 +116,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variable # define tensor variable
X = T.matrix("X") X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X.T]) results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs = [X], outputs=[results]) compute_norm_cols = theano.function(inputs=[X], outputs=[results])
# test value # test value
x = np.diag(np.arange(1,6),1) x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_cols(x)[0] print compute_norm_cols(x)[0]
# comparison with numpy # comparison with numpy
print np.sqrt((x**2).sum(0)) print np.sqrt((x ** 2).sum(0))
**Scan Example: Computing trace of X** **Scan Example: Computing trace of X**
...@@ -136,30 +136,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -136,30 +136,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
floatX = "float32" floatX = "float32"
# define tensor variable # define tensor variable
X = T.matrix("X") X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j]+t_f, floatX), \ results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])], \ sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
outputs_info=np.asarray(0., dtype=floatX)) outputs_info=np.asarray(0., dtype=floatX))
result = results[-1] result = results[-1]
compute_trace = theano.function(inputs = [X], outputs=[result]) compute_trace = theano.function(inputs=[X], outputs=[result])
# test value # test value
x = np.eye(5) x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5) x[0] = np.arange(5, dtype=theano.config.floatX)
compute_trace(x)[0] print compute_trace(x)[0]
# comparison with numpy # comparison with numpy
print np.diagonal(x).sum() print np.diagonal(x).sum()
**Scan Example: Computing the sequence x(t) = x(t-2).dot(U) + x(t-1).dot(V) + tanh(x(t-1).dot(W) + b)** **Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)**
.. code-block:: python .. code-block:: python
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variables # define tensor variables
X = T.matrix("X") X = T.matrix("X")
W = T.matrix("W") W = T.matrix("W")
...@@ -168,31 +168,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -168,31 +168,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
V = T.matrix("V") V = T.matrix("V")
n_sym = T.iscalar("n_sym") n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) + T.dot(x_tm1,V) \ results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
+ T.tanh(T.dot(x_tm1,W) + b_sym), \ n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
n_steps=n_sym, outputs_info=[dict(initial = X, taps = [-2,-1])]) compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=[results])
compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, n_sym], outputs=[results])
# test values # test values
x = np.zeros((2,2)) # the initial value must be able to return x[-2] x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
x[1,1] = 1 x[1, 1] = 1
w = 0.5*np.ones((2,2)) w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5*(np.ones((2,2))-np.eye(2)) u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
v = 0.5*np.ones((2,2)) v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10 n = 10
b = np.ones((2)) b = np.ones((2), dtype=theano.config.floatX)
print compute_seq2(x,u,v,w,b,n) print compute_seq2(x, u, v, w, b, n)
# comparison with numpy # comparison with numpy
x_res = numpy.zeros((10,2)) x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b) x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + numpy.tanh(x_res[0].dot(w) + b) x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \ x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
+ numpy.tanh(x_res[1].dot(w) + b) for i in range(2, 10):
for i in range(2,10): x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \ np.tanh(x_res[i - 1].dot(w) + b))
+ numpy.tanh(x_res[i-1].dot(w) + b)) print x_res
**Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x** **Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x**
...@@ -200,24 +199,24 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -200,24 +199,24 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variables # define tensor variables
v = T.vector() v = T.vector()
A = T.matrix() A = T.matrix()
y = T.tanh(T.dot(v,A)) y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i:T.grad(y[i], v), sequences = [T.arange(y.shape[0])]) results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A,v], [results], allow_input_downcast = True) # shape (d_out, d_in) compute_jac_t = theano.function([A, v], [results], allow_input_downcast=True) # shape (d_out, d_in)
# test values # test values
x = np.eye(5)[0] x = np.eye(5, dtype=theano.config.floatX)[0]
w = np.eye(5,3) w = np.eye(5, 3, dtype=theano.config.floatX)
w[2] = np.ones((3)) w[2] = np.ones((3), dtype=theano.config.floatX)
print compute_jac_t(w,x)[0] print compute_jac_t(w, x)[0]
# compare with numpy # compare with numpy
print ((1 - np.tanh(x.dot(w))**2)*w).T print ((1 - np.tanh(x.dot(w)) ** 2) * w).T
Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it. Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.
**Scan Example: Accumulate number of loop during a scan** **Scan Example: Accumulate number of loop during a scan**
...@@ -225,46 +224,46 @@ Note that we need to iterate over the indices of ``y`` and not over the elements ...@@ -225,46 +224,46 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define shared variables # define shared variables
k = theano.shared(0) k = theano.shared(0)
n_sym = T.iscalar("n_sym") n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym) results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast = True) accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
k.get_value() k.get_value()
accumulator(5) accumulator(5)
k.get_value() k.get_value()
**Scan Example: Computing tanh(v.dot(W) + b)*d where b is binomial** **Scan Example: Computing tanh(v.dot(W) + b) * d where b is binomial**
.. code-block:: python .. code-block:: python
import theano import theano
import theano.tensor as T import theano.tensor as T
import numpy as np import numpy as np
# define tensor variables # define tensor variables
X = T.matrix("X") X = T.matrix("X")
W = T.matrix("W") W = T.matrix("W")
b_sym = T.vector("b_sym") b_sym = T.vector("b_sym")
# define shared random stream # define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234) trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape) d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym)*d, sequences=X) results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
compute_with_bnoise = theano.function(inputs = [X, W, b_sym], outputs=[results], \ compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=[results],
updates=updates, allow_input_downcast = True) updates=updates, allow_input_downcast=True)
x = np.eye(10,2) x = np.eye(10, 2, dtype=theano.config.floatX)
w = np.ones((2,2)) w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2)) b = np.ones((2), dtype=theano.config.floatX)
print compute_with_bnoise(x, w, b) print compute_with_bnoise(x, w, b)
Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments. Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments.
**Scan Example: Computing pow(A,k)** **Scan Example: Computing pow(A, k)**
.. code-block:: python .. code-block:: python
...@@ -286,11 +285,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th ...@@ -286,11 +285,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
# Scan has provided us with A ** 1 through A ** k. Keep only the last # Scan has provided us with A ** 1 through A ** k. Keep only the last
# value. Scan notices this and does not waste memory saving them. # value. Scan notices this and does not waste memory saving them.
final_result = result[-1] final_result = result[-1]
power = theano.function(inputs=[A, k], outputs=final_result, power = theano.function(inputs=[A, k], outputs=final_result,
updates=updates) updates=updates)
print power(range(10),2) print power(range(10), 2)
#[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.] #[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.]
......
...@@ -1146,7 +1146,8 @@ class test_fusion(unittest.TestCase): ...@@ -1146,7 +1146,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize. #we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant. #the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including( mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp) self.do(mode, shared, shp)
@attr('slow') @attr('slow')
...@@ -1156,7 +1157,8 @@ class test_fusion(unittest.TestCase): ...@@ -1156,7 +1157,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize. #we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant. #the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including( mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp) self.do(mode, shared, shp)
def test_gpu_fusion(self): def test_gpu_fusion(self):
...@@ -1164,10 +1166,12 @@ class test_fusion(unittest.TestCase): ...@@ -1164,10 +1166,12 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled, debug do this. #we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE": if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including( mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'canonicalize', 'gpu') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else: else:
mode = theano.compile.mode.get_default_mode().including( mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'canonicalize', 'gpu') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if not cuda.cuda_available: if not cuda.cuda_available:
raise SkipTest("cuda not available") raise SkipTest("cuda not available")
...@@ -1179,10 +1183,12 @@ class test_fusion(unittest.TestCase): ...@@ -1179,10 +1183,12 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled, debug do this. #we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE": if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including( mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'canonicalize', 'gpu') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else: else:
mode = theano.compile.mode.get_default_mode().including( mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'canonicalize', 'gpu') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if not cuda.cuda_available: if not cuda.cuda_available:
raise SkipTest("cuda not available") raise SkipTest("cuda not available")
...@@ -1278,7 +1284,8 @@ class test_fusion(unittest.TestCase): ...@@ -1278,7 +1284,8 @@ class test_fusion(unittest.TestCase):
#we need the optimisation enabled and the canonicalize. #we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant. #the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including( mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'canonicalize', 'inplace') 'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'inplace')
x, y, z = dmatrices('xyz') x, y, z = dmatrices('xyz')
f = theano.function([x, y, z], tensor.dot(x, y) + x + y + z, mode=mode) f = theano.function([x, y, z], tensor.dot(x, y) + x + y + z, mode=mode)
......
...@@ -1137,213 +1137,209 @@ class T_graphstructures(unittest.TestCase): ...@@ -1137,213 +1137,209 @@ class T_graphstructures(unittest.TestCase):
assert e.owner.inputs[1].owner.inputs[0] is y assert e.owner.inputs[1].owner.inputs[0] is y
assert e.owner.inputs[1].owner.inputs[1] is z assert e.owner.inputs[1].owner.inputs[1] is z
class T_scan(unittest.TestCase): class T_scan(unittest.TestCase):
## All tests here belong to ## All tests here belong to
## http://deeplearning.net/software/theano/tutorial/loop.html ## http://deeplearning.net/software/theano/tutorial/loop.html
## Theano/doc/tutorial/loop.txt ## Theano/doc/tutorial/loop.txt
## Any change you do here also add it to the tutorial ! ## Any change you do here also add it to the tutorial !
def test_elemwise(self): def test_elemwise(self):
# defining the tensor variables
# defining the tensor variables X = T.matrix("X")
X = T.matrix("X") W = T.matrix("W")
W = T.matrix("W") b_sym = T.vector("b_sym")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym),
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), \ sequences=X)
sequences=X) compute_elementwise = theano.function(inputs=[X, W, b_sym],
compute_elementwise = theano.function(inputs = [X, W, b_sym], \ outputs=[results])
outputs=[results])
# test values
# test values x = numpy.eye(2, dtype=theano.config.floatX)
x = numpy.eye(2) w = numpy.ones((2, 2), dtype=theano.config.floatX)
w = numpy.ones((2,2)) b = numpy.ones((2), dtype=theano.config.floatX)
b = numpy.ones((2)) b[1] = 2
b[1] = 2
print "Scan results:", compute_elementwise(x, w, b)[0]
print "Scan results:", compute_elementwise(x, w, b)[0]
# comparison with numpy
# comparison with numpy print "Numpy results:", numpy.tanh(x.dot(w) + b)
print "Numpy results:", numpy.tanh(x.dot(w) + b)
def test_sequence(self): def test_sequence(self):
# define tensor variables
# define tensor variables X = T.vector("X")
X = T.vector("X") W = T.matrix("W")
W = T.matrix("W") b_sym = T.vector("b_sym")
b_sym = T.vector("b_sym") U = T.matrix("U")
U = T.matrix("U") Y = T.matrix("Y")
Y = T.matrix("Y") V = T.matrix("V")
V = T.matrix("V") P = T.matrix("P")
P = T.matrix("P")
results, updates = theano.scan(
results, updates = theano.scan(lambda \ lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) +
y,p,x_tm1:T.tanh(T.dot(x_tm1,W) + \ T.dot(y, U) + T.dot(p, V)),
T.dot(y,U)+T.dot(p,V)), \ sequences=[Y, P[::-1]], outputs_info=[X])
sequences=[Y,P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V],
compute_seq = theano.function(inputs = [X, W, Y, U, P, V], \ outputs=[results])
outputs=[results])
# test values
# test values x = numpy.zeros((2), dtype=theano.config.floatX)
x = numpy.zeros((2)) x[1] = 1
x[1] = 1 w = numpy.ones((2, 2), dtype=theano.config.floatX)
w = numpy.ones((2,2)) y = numpy.ones((5, 2), dtype=theano.config.floatX)
y = numpy.ones((5,2)) y[0, :] = -3
y[0,:] = -3 u = numpy.ones((2, 2), dtype=theano.config.floatX)
u = numpy.ones((2,2)) p = numpy.ones((5, 2), dtype=theano.config.floatX)
p = numpy.ones((5,2)) p[0, :] = 3
p[0,:] = 3 v = numpy.ones((2, 2), dtype=theano.config.floatX)
v = numpy.ones((2,2))
print "Scan results", compute_seq(x, w, y, u, p, v)[0]
print "Scan results", compute_seq(x,w,y,u,p,v)[0]
# comparison with numpy
# comparison with numpy x_res = numpy.zeros((5, 2), dtype=theano.config.floatX)
x_res = numpy.zeros((5,2)) x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v)) for i in range(1, 5):
for i in range(1,5): x_res[i] = numpy.tanh(x_res[i-1].dot(w) +
x_res[i] = numpy.tanh(x_res[i-1].dot(w) \ y[i].dot(u) + p[4-i].dot(v))
+ y[i].dot(u) + p[4-i].dot(v))
print "Numpy results:", x_res
print "Numpy results:", x_res
def test_norm(self): def test_norm(self):
# define tensor variable # define tensor variable
X = T.matrix("X") X = T.matrix("X")
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \ results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X]) sequences=[X])
compute_norm_lines = theano.function(inputs = [X], outputs=[results]) compute_norm_lines = theano.function(inputs=[X], outputs=[results])
results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \ results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X.T]) sequences=[X.T])
compute_norm_cols = theano.function(inputs = [X], outputs=[results]) compute_norm_cols = theano.function(inputs=[X], outputs=[results])
# test value # test value
x = numpy.diag(numpy.arange(1,6),1) x = numpy.diag(numpy.arange(1, 6, dtype=theano.config.floatX), 1)
print "Scan results:", compute_norm_lines(x)[0], \ print "Scan results:", compute_norm_lines(x)[0], \
compute_norm_cols(x)[0] compute_norm_cols(x)[0]
# comparison with numpy # comparison with numpy
print "Numpy results:", numpy.sqrt((x**2).sum(1)), \ print "Numpy results:", numpy.sqrt((x**2).sum(1)), \
numpy.sqrt((x**2).sum(0)) numpy.sqrt((x**2).sum(0))
def test_trace(self): def test_trace(self):
# define tensor variable
# define tensor variable X = T.matrix("X")
X = T.matrix("X") results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i,j] +
results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j] + \ t_f, theano.config.floatX),
t_f, theano.config.floatX), \ sequences=[T.arange(X.shape[0]),
sequences=[T.arange(X.shape[0]), \ T.arange(X.shape[1])],
T.arange(X.shape[1])], \ outputs_info=numpy.asarray(
outputs_info=numpy.asarray(0., \ 0., dtype=theano.config.floatX))
dtype=theano.config.floatX))
result = results[-1]
result = results[-1] compute_trace = theano.function(inputs=[X], outputs=[result])
compute_trace = theano.function(inputs = [X], outputs=[result])
# test value
# test value x = numpy.eye(5, dtype=theano.config.floatX)
x = numpy.eye(5) x[0] = numpy.arange(5, dtype=theano.config.floatX)
x[0] = numpy.arange(5) print "Scan results:", compute_trace(x)[0]
print "Scan results:", compute_trace(x)[0]
# comparison with numpy
# comparison with numpy print "Numpy results:", numpy.diagonal(x).sum()
print "Numpy results:", numpy.diagonal(x).sum()
def test_taps(self): def test_taps(self):
# define tensor variables
# define tensor variables X = T.matrix("X")
X = T.matrix("X") W = T.matrix("W")
W = T.matrix("W") b_sym = T.vector("b_sym")
b_sym = T.vector("b_sym") U = T.matrix("U")
U = T.matrix("U") V = T.matrix("V")
V = T.matrix("V") n_sym = T.iscalar("n_sym")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(
results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) \ lambda x_tm2,x_tm1: T.dot(x_tm2,U) + T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym),
+ T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym), \ n_steps=n_sym,
n_steps=n_sym, \ outputs_info=[dict(initial=X, taps=[-2, -1])])
outputs_info=[dict(initial = X, taps = [-2,-1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym],
compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, \ outputs=[results])
n_sym], outputs=[results])
# test values
# test values x = numpy.zeros((2, 2), dtype=theano.config.floatX)
x = numpy.zeros((2,2)) # the initial value must be able to return x[-2]
# the initial value must be able to return x[-2] x[1, 1] = 1
x[1,1] = 1 w = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
w = 0.5*numpy.ones((2,2)) u = 0.5 * (numpy.ones((2, 2), dtype=theano.config.floatX) -
u = 0.5*(numpy.ones((2,2))-numpy.eye(2)) numpy.eye(2, dtype=theano.config.floatX))
v = 0.5*numpy.ones((2,2)) v = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
n = 10 n = 10
b = numpy.ones((2)) b = numpy.ones((2), dtype=theano.config.floatX)
print "Scan results:", compute_seq2(x,u,v,w,b,n) print "Scan results:", compute_seq2(x, u, v, w, b, n)
# comparison with numpy # comparison with numpy
x_res = numpy.zeros((10,2)) x_res = numpy.zeros((10, 2), dtype=theano.config.floatX)
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b) x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) \ x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
+ numpy.tanh(x_res[0].dot(w) + b) + numpy.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \ x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+ numpy.tanh(x_res[1].dot(w) + b) + numpy.tanh(x_res[1].dot(w) + b)
for i in range(2,10): for i in range(2, 10):
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \ x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) +
+ numpy.tanh(x_res[i-1].dot(w) + b)) numpy.tanh(x_res[i-1].dot(w) + b))
print "Numpy results:", x_res print "Numpy results:", x_res
def test_jacobian(self): def test_jacobian(self):
# define tensor variables
# define tensor variables v = T.vector()
v = T.vector() A = T.matrix()
A = T.matrix() y = T.tanh(T.dot(v, A))
y = T.tanh(T.dot(v,A)) results, updates = theano.scan(lambda i: T.grad(y[i], v),
results, updates = theano.scan(lambda i:T.grad(y[i], v), \ sequences=[T.arange(y.shape[0])])
sequences = [T.arange(y.shape[0])]) compute_jac_t = theano.function([A, v], [results],
compute_jac_t = theano.function([A,v], [results], \ allow_input_downcast=True) # shape (d_out, d_in)
allow_input_downcast = True) # shape (d_out, d_in)
# test values
# test values x = numpy.eye(5)[0]
x = numpy.eye(5)[0] w = numpy.eye(5, 3)
w = numpy.eye(5,3) w[2] = numpy.ones((3))
w[2] = numpy.ones((3)) print "Scan results:", compute_jac_t(w, x)[0]
print "Scan results:", compute_jac_t(w,x)[0]
# compare with numpy
# compare with numpy print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T
print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T
def test_accumulator(self): def test_accumulator(self):
# define shared variables # define shared variables
k = theano.shared(0) k = theano.shared(0)
n_sym = T.iscalar("n_sym") n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, \
allow_input_downcast = True)
print "Before 5 steps:", k.get_value()
accumulator(5)
print "After 5 steps:", k.get_value()
def test_random(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W) \
+ b_sym)*d, sequences=X)
compute_with_bnoise = theano.function(inputs = [X, W, b_sym], \
outputs=[results], \
updates=updates, \
allow_input_downcast = True)
x = numpy.eye(10,2)
w = numpy.ones((2,2))
b = numpy.ones((2))
print compute_with_bnoise(x, w, b)
results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates,
allow_input_downcast=True)
print "Before 5 steps:", k.get_value()
accumulator(5)
print "After 5 steps:", k.get_value()
def test_random(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d = trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym],
outputs=[results],
updates=updates,
allow_input_downcast = True)
x = numpy.eye(10,2)
w = numpy.ones((2,2))
b = numpy.ones((2))
print compute_with_bnoise(x, w, b)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论