Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
33667eb7
提交
33667eb7
authored
10月 18, 2020
作者:
Brandon T. Willard
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Replace theano.tensor alias T with tt in documentation
上级
1e6bbdef
隐藏空白字符变更
内嵌
并排
正在显示
34 个修改的文件
包含
646 行增加
和
641 行删除
+646
-641
advanced_theano.txt
doc/cifarSC2011/advanced_theano.txt
+16
-16
theano.txt
doc/cifarSC2011/theano.txt
+27
-27
advanced_theano.txt
doc/crei2013/advanced_theano.txt
+8
-8
extending_theano_c.txt
doc/extending/extending_theano_c.txt
+2
-1
fibby.txt
doc/extending/fibby.txt
+3
-3
graphstructures.txt
doc/extending/graphstructures.txt
+12
-12
tips.txt
doc/extending/tips.txt
+3
-5
logreg_example.py
doc/hpcs2011_tutorial/logreg_example.py
+40
-36
presentation.tex
doc/hpcs2011_tutorial/presentation.tex
+101
-101
scan_poly.py
doc/hpcs2011_tutorial/scan_poly.py
+11
-11
scan_pow.py
doc/hpcs2011_tutorial/scan_pow.py
+12
-9
io.txt
doc/library/compile/io.txt
+10
-10
nanguardmode.txt
doc/library/compile/nanguardmode.txt
+4
-4
index.ipynb
doc/library/d3viz/index.ipynb
+7
-7
index.txt
doc/library/d3viz/index.txt
+15
-15
fft.txt
doc/library/gpuarray/fft.txt
+2
-2
printing.txt
doc/library/printing.txt
+17
-18
scan.txt
doc/library/scan.txt
+38
-38
basic.txt
doc/library/tensor/basic.txt
+55
-55
fft.txt
doc/library/tensor/fft.txt
+2
-2
nnet.txt
doc/library/tensor/nnet/nnet.txt
+24
-24
presentation.tex
doc/nextml2015/presentation.tex
+46
-46
logistic_regression_example.txt
doc/sandbox/logistic_regression_example.txt
+10
-10
adding.txt
doc/tutorial/adding.txt
+14
-14
broadcasting.txt
doc/tutorial/broadcasting.txt
+4
-5
conditions.txt
doc/tutorial/conditions.txt
+5
-5
debug_faq.txt
doc/tutorial/debug_faq.txt
+14
-14
examples.txt
doc/tutorial/examples.txt
+25
-24
gradients.txt
doc/tutorial/gradients.txt
+43
-43
index.txt
doc/tutorial/index.txt
+3
-3
loop.txt
doc/tutorial/loop.txt
+53
-53
modes.txt
doc/tutorial/modes.txt
+7
-7
printing_drawing.txt
doc/tutorial/printing_drawing.txt
+6
-6
using_gpu.txt
doc/tutorial/using_gpu.txt
+7
-7
没有找到文件。
doc/cifarSC2011/advanced_theano.txt
浏览文件 @
33667eb7
...
@@ -18,15 +18,15 @@ Conditions
...
@@ -18,15 +18,15 @@ Conditions
.. testcode::
.. testcode::
from theano import tensor as
T
from theano import tensor as
tt
from theano.ifelse import ifelse
from theano.ifelse import ifelse
import theano, time, numpy
import theano, time, numpy
a,b =
T
.scalars('a','b')
a,b =
tt
.scalars('a','b')
x,y =
T
.matrices('x','y')
x,y =
tt
.matrices('x','y')
z_switch =
T.switch(T.lt(a,b), T.mean(x), T
.mean(y))
z_switch =
tt.switch(tt.lt(a,b), tt.mean(x), tt
.mean(y))
z_lazy = ifelse(
T.lt(a,b), T.mean(x), T
.mean(y))
z_lazy = ifelse(
tt.lt(a,b), tt.mean(x), tt
.mean(y))
f_switch = theano.function([a,b,x,y], z_switch,
f_switch = theano.function([a,b,x,y], z_switch,
mode=theano.Mode(linker='vm'))
mode=theano.Mode(linker='vm'))
...
@@ -98,14 +98,14 @@ Loops
...
@@ -98,14 +98,14 @@ Loops
.. code-block:: python
.. code-block:: python
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
k =
T.iscalar("k"); A = T
.vector("A")
k =
tt.iscalar("k"); A = tt
.vector("A")
def inner_fct(prior_result, A): return prior_result * A
def inner_fct(prior_result, A): return prior_result * A
# Symbolic description of the result
# Symbolic description of the result
result, updates = theano.scan(fn=inner_fct,
result, updates = theano.scan(fn=inner_fct,
outputs_info=
T
.ones_like(A),
outputs_info=
tt
.ones_like(A),
non_sequences=A, n_steps=k)
non_sequences=A, n_steps=k)
# Scan has provided us with A**1 through A**k. Keep only the last
# Scan has provided us with A**1 through A**k. Keep only the last
...
@@ -125,10 +125,10 @@ Loops
...
@@ -125,10 +125,10 @@ Loops
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
coefficients = theano.tensor.vector("coefficients")
coefficients = theano.tensor.vector("coefficients")
x =
T
.scalar("x"); max_coefficients_supported = 10000
x =
tt
.scalar("x"); max_coefficients_supported = 10000
# Generate the components of the polynomial
# Generate the components of the polynomial
full_range=theano.tensor.arange(max_coefficients_supported)
full_range=theano.tensor.arange(max_coefficients_supported)
...
@@ -384,7 +384,7 @@ Consider the following logistic regression model:
...
@@ -384,7 +384,7 @@ Consider the following logistic regression model:
>>> import numpy
>>> import numpy
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> rng = numpy.random
>>> rng = numpy.random
>>> # Training data
>>> # Training data
>>> N = 400
>>> N = 400
...
@@ -392,19 +392,19 @@ Consider the following logistic regression model:
...
@@ -392,19 +392,19 @@ Consider the following logistic regression model:
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> training_steps = 10000
>>> training_steps = 10000
>>> # Declare Theano symbolic variables
>>> # Declare Theano symbolic variables
>>> x =
T
.matrix("x")
>>> x =
tt
.matrix("x")
>>> y =
T
.vector("y")
>>> y =
tt
.vector("y")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> x.tag.test_value = D[0]
>>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1]
>>> y.tag.test_value = D[1]
>>> # Construct Theano expression graph
>>> # Construct Theano expression graph
>>> p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability of having a one
>>> p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability of having a one
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> # Compute gradients
>>> # Compute gradients
>>> xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy
>>> xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> gw,gb =
T
.grad(cost, [w,b])
>>> gw,gb =
tt
.grad(cost, [w,b])
>>> # Training and prediction function
>>> # Training and prediction function
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
...
...
doc/cifarSC2011/theano.txt
浏览文件 @
33667eb7
...
@@ -99,7 +99,7 @@ Real example
...
@@ -99,7 +99,7 @@ Real example
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400
N = 400
...
@@ -108,19 +108,19 @@ Real example
...
@@ -108,19 +108,19 @@ Real example
training_steps = 10000
training_steps = 10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x =
T
.matrix("x")
x =
tt
.matrix("x")
y =
T
.vector("y")
y =
tt
.vector("y")
w = theano.shared(rng.randn(feats), name="w")
w = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b")
b = theano.shared(0., name="b")
print "Initial model:"
print "Initial model:"
print w.get_value(), b.get_value()
print w.get_value(), b.get_value()
# Construct Theano expression graph
# Construct Theano expression graph
p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability that target = 1
p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy loss function
xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01*(w**2).sum() # The cost to minimize
cost = xent.mean() + 0.01*(w**2).sum() # The cost to minimize
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
# Compile
# Compile
train = theano.function(
train = theano.function(
...
@@ -145,7 +145,7 @@ Where are those optimization applied?
...
@@ -145,7 +145,7 @@ Where are those optimization applied?
* ``log(1+exp(x))``
* ``log(1+exp(x))``
* ``1 / (1 +
T
.exp(var))`` (sigmoid)
* ``1 / (1 +
tt
.exp(var))`` (sigmoid)
* ``log(1-sigmoid(var))`` (softplus, stabilisation)
* ``log(1-sigmoid(var))`` (softplus, stabilisation)
...
@@ -156,13 +156,13 @@ Where are those optimization applied?
...
@@ -156,13 +156,13 @@ Where are those optimization applied?
.. code-block:: python
.. code-block:: python
p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b))
p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b))
# 1 / (1 +
T
.exp(var)) -> sigmoid(var)
# 1 / (1 +
tt
.exp(var)) -> sigmoid(var)
xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1)
xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1)
# Log(1-sigmoid(var)) -> -sigmoid(var)
# Log(1-sigmoid(var)) -> -sigmoid(var)
prediction = p_1 > 0.5
prediction = p_1 > 0.5
cost = xent.mean() + 0.01*(w**2).sum()
cost = xent.mean() + 0.01*(w**2).sum()
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
train = theano.function(
train = theano.function(
inputs=[x,y],
inputs=[x,y],
...
@@ -188,7 +188,7 @@ Exercise 2
...
@@ -188,7 +188,7 @@ Exercise 2
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400
N = 400
...
@@ -198,8 +198,8 @@ Exercise 2
...
@@ -198,8 +198,8 @@ Exercise 2
training_steps = 10000
training_steps = 10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x =
T
.matrix("x")
x =
tt
.matrix("x")
y =
T
.vector("y")
y =
tt
.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
x.tag.test_value = D[0]
...
@@ -209,11 +209,11 @@ Exercise 2
...
@@ -209,11 +209,11 @@ Exercise 2
# Construct Theano expression graph
# Construct Theano expression graph
p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability of having a one
p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy
xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
# Compile expressions to functions
# Compile expressions to functions
train = theano.function(
train = theano.function(
...
@@ -296,19 +296,19 @@ Symbolic variables
...
@@ -296,19 +296,19 @@ Symbolic variables
* # Dimensions
* # Dimensions
*
T.scalar, T.vector, T.matrix, T.tensor3, T
.tensor4
*
tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt
.tensor4
* Dtype
* Dtype
*
T
.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
*
tt
.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
*
T
.vector to floatX dtype
*
tt
.vector to floatX dtype
* floatX: configurable dtype that can be float32 or float64.
* floatX: configurable dtype that can be float32 or float64.
* Custom variable
* Custom variable
* All are shortcuts to: ``
T
.tensor(dtype, broadcastable=[False]*nd)``
* All are shortcuts to: ``
tt
.tensor(dtype, broadcastable=[False]*nd)``
* Other dtype: uint[8,16,32,64], floatX
* Other dtype: uint[8,16,32,64], floatX
...
@@ -325,21 +325,21 @@ Details regarding symbolic broadcasting...
...
@@ -325,21 +325,21 @@ Details regarding symbolic broadcasting...
* Broadcastability must be specified when creating the variable
* Broadcastability must be specified when creating the variable
* The only shorcut with broadcastable dimensions are: **
T.row** and **T
.col**
* The only shorcut with broadcastable dimensions are: **
tt.row** and **tt
.col**
* For all others: ``
T
.tensor(dtype, broadcastable=([False or True])*nd)``
* For all others: ``
tt
.tensor(dtype, broadcastable=([False or True])*nd)``
Differentiation details
Differentiation details
-----------------------
-----------------------
>>> gw,gb =
T
.grad(cost, [w,b]) # doctest: +SKIP
>>> gw,gb =
tt
.grad(cost, [w,b]) # doctest: +SKIP
*
T
.grad works symbolically: takes and returns a Theano variable
*
tt
.grad works symbolically: takes and returns a Theano variable
*
T
.grad can be compared to a macro: it can be applied multiple times
*
tt
.grad can be compared to a macro: it can be applied multiple times
*
T
.grad takes scalar costs only
*
tt
.grad takes scalar costs only
* Simple recipe allows to compute efficiently vector x Jacobian and vector x Hessian
* Simple recipe allows to compute efficiently vector x Jacobian and vector x Hessian
...
...
doc/crei2013/advanced_theano.txt
浏览文件 @
33667eb7
...
@@ -77,7 +77,7 @@ Loops
...
@@ -77,7 +77,7 @@ Loops
- ``sum()`` could be computed by scanning the z + x(i) function over a list, given an initial state of ``z=0``.
- ``sum()`` could be computed by scanning the z + x(i) function over a list, given an initial state of ``z=0``.
- Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
- Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
- The advantage of using ``scan`` over for loops
- The advantage of using ``scan`` over for loops
- The number of iterations to be part of the symbolic graph
- The number of iterations to be part of the symbolic graph
- Minimizes GPU transfers if GPU is involved
- Minimizes GPU transfers if GPU is involved
- Compute gradients through sequential steps
- Compute gradients through sequential steps
...
@@ -96,7 +96,7 @@ Loops
...
@@ -96,7 +96,7 @@ Loops
Exercise 4
Exercise 4
-----------
-----------
- Run both examples
- Run both examples
- Modify and execute the polynomial example to have the reduction done by scan
- Modify and execute the polynomial example to have the reduction done by scan
...
@@ -116,7 +116,7 @@ Consider the following logistic regression model:
...
@@ -116,7 +116,7 @@ Consider the following logistic regression model:
>>> import numpy
>>> import numpy
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> rng = numpy.random
>>> rng = numpy.random
>>> # Training data
>>> # Training data
>>> N = 400
>>> N = 400
...
@@ -124,19 +124,19 @@ Consider the following logistic regression model:
...
@@ -124,19 +124,19 @@ Consider the following logistic regression model:
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> training_steps = 10000
>>> training_steps = 10000
>>> # Declare Theano symbolic variables
>>> # Declare Theano symbolic variables
>>> x =
T
.matrix("x")
>>> x =
tt
.matrix("x")
>>> y =
T
.vector("y")
>>> y =
tt
.vector("y")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> x.tag.test_value = D[0]
>>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1]
>>> y.tag.test_value = D[1]
>>> # Construct Theano expression graph
>>> # Construct Theano expression graph
>>> p_1 = 1 / (1 +
T
.exp(-T.dot(x, w)-b)) # Probability of having a one
>>> p_1 = 1 / (1 +
tt
.exp(-T.dot(x, w)-b)) # Probability of having a one
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> # Compute gradients
>>> # Compute gradients
>>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
>>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> gw,gb =
T
.grad(cost, [w,b])
>>> gw,gb =
tt
.grad(cost, [w,b])
>>> # Training and prediction function
>>> # Training and prediction function
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
...
@@ -251,7 +251,7 @@ Debugging
...
@@ -251,7 +251,7 @@ Debugging
- For pure symbolic variables uses ``x.tag.test_value = numpy.random.rand(5,10)``
- For pure symbolic variables uses ``x.tag.test_value = numpy.random.rand(5,10)``
- Run with the flag ``mode=FAST_COMPILE``
- Run with the flag ``mode=FAST_COMPILE``
- Few optimizations
- Few optimizations
- Run Python code (better error messages and can be debugged interactively in the Python debugger)
- Run Python code (better error messages and can be debugged interactively in the Python debugger)
...
...
doc/extending/extending_theano_c.txt
浏览文件 @
33667eb7
...
@@ -474,8 +474,9 @@ storage with the right shape and number of dimensions.
...
@@ -474,8 +474,9 @@ storage with the right shape and number of dimensions.
import numpy
import numpy
import theano
import theano
from theano import gof
from theano import gof
import theano.tensor as T
class VectorTimesScalar(gof.Op):
class VectorTimesScalar(gof.Op):
__props__ = ()
__props__ = ()
...
...
doc/extending/fibby.txt
浏览文件 @
33667eb7
...
@@ -137,12 +137,12 @@ Here is some code to test that the optimization is applied only when needed.
...
@@ -137,12 +137,12 @@ Here is some code to test that the optimization is applied only when needed.
.. testcode::
.. testcode::
import numpy
import numpy
import theano.tensor as
T
import theano.tensor as
tt
from theano import function
from theano import function
from theano import tensor
from theano import tensor
# Test it does not apply when not needed
# Test it does not apply when not needed
x =
T
.dvector()
x =
tt
.dvector()
f = function([x], fibby(x))
f = function([x], fibby(x))
# We call the function to make sure it runs.
# We call the function to make sure it runs.
...
@@ -153,7 +153,7 @@ Here is some code to test that the optimization is applied only when needed.
...
@@ -153,7 +153,7 @@ Here is some code to test that the optimization is applied only when needed.
assert isinstance(topo[0].op, Fibby)
assert isinstance(topo[0].op, Fibby)
# Test that the optimization gets applied.
# Test that the optimization gets applied.
f_zero = function([], fibby(
T
.zeros([5])))
f_zero = function([], fibby(
tt
.zeros([5])))
# If you run in DebugMode, it will compare the output before
# If you run in DebugMode, it will compare the output before
# and after the optimization.
# and after the optimization.
...
...
doc/extending/graphstructures.txt
浏览文件 @
33667eb7
...
@@ -22,9 +22,9 @@ graphs are composed of interconnected :ref:`apply`, :ref:`variable` and
...
@@ -22,9 +22,9 @@ graphs are composed of interconnected :ref:`apply`, :ref:`variable` and
:ref:`op` nodes. *Apply* node represents the application of an *op* to some
:ref:`op` nodes. *Apply* node represents the application of an *op* to some
*variables*. It is important to draw the difference between the
*variables*. It is important to draw the difference between the
definition of a computation represented by an *op* and its application
definition of a computation represented by an *op* and its application
to some actual data which is represented by the *apply* node.
to some actual data which is represented by the *apply* node.
Furthermore, data types are represented by :ref:`type` instances. Here is a
Furthermore, data types are represented by :ref:`type` instances. Here is a
piece of code and a diagram showing the structure built by that piece of code.
piece of code and a diagram showing the structure built by that piece of code.
This should help you understand how these pieces fit together:
This should help you understand how these pieces fit together:
...
@@ -32,10 +32,10 @@ This should help you understand how these pieces fit together:
...
@@ -32,10 +32,10 @@ This should help you understand how these pieces fit together:
.. testcode::
.. testcode::
import theano.tensor as
T
import theano.tensor as
tt
x =
T
.dmatrix('x')
x =
tt
.dmatrix('x')
y =
T
.dmatrix('y')
y =
tt
.dmatrix('y')
z = x + y
z = x + y
**Diagram**
**Diagram**
...
@@ -442,13 +442,13 @@ The output file is available at ./pics/symbolic_graph_opt.png
...
@@ -442,13 +442,13 @@ The output file is available at ./pics/symbolic_graph_opt.png
We used :func:`theano.printing.pydotprint` to visualize the optimized graph
We used :func:`theano.printing.pydotprint` to visualize the optimized graph
(right), which is much more compact than the unoptimized graph (left).
(right), which is much more compact than the unoptimized graph (left).
.. |g1| image:: ./pics/symbolic_graph_unopt.png
.. |g1| image:: ./pics/symbolic_graph_unopt.png
:width: 500 px
:width: 500 px
.. |g2| image:: ./pics/symbolic_graph_opt.png
.. |g2| image:: ./pics/symbolic_graph_opt.png
:width: 500 px
:width: 500 px
================================ ====================== ================================
================================ ====================== ================================
Unoptimized graph Optimized graph
Unoptimized graph Optimized graph
================================ ====================== ================================
================================ ====================== ================================
|g1| |g2|
|g1| |g2|
================================ ====================== ================================
================================ ====================== ================================
doc/extending/tips.txt
浏览文件 @
33667eb7
...
@@ -21,10 +21,10 @@ simple function:
...
@@ -21,10 +21,10 @@ simple function:
.. testcode::
.. testcode::
from theano import tensor as
T
from theano import tensor as
tt
def sum_square_difference(a, b):
def sum_square_difference(a, b):
return
T
.sum((a - b)**2)
return
tt
.sum((a - b)**2)
Even without taking Theano's optimizations into account, it is likely
Even without taking Theano's optimizations into account, it is likely
to work just as well as a custom implementation. It also supports all
to work just as well as a custom implementation. It also supports all
...
@@ -40,7 +40,7 @@ Theano provides some generic Op classes which allow you to generate a
...
@@ -40,7 +40,7 @@ Theano provides some generic Op classes which allow you to generate a
lot of Ops at a lesser effort. For instance, Elemwise can be used to
lot of Ops at a lesser effort. For instance, Elemwise can be used to
make :term:`elementwise` operations easily whereas DimShuffle can be
make :term:`elementwise` operations easily whereas DimShuffle can be
used to make transpose-like transformations. These higher order Ops
used to make transpose-like transformations. These higher order Ops
are mostly Tensor-related, as this is Theano's specialty.
are mostly Tensor-related, as this is Theano's specialty.
.. _opchecklist:
.. _opchecklist:
...
@@ -53,5 +53,3 @@ defining a new Op. It might not be exhaustive but it covers a lot of
...
@@ -53,5 +53,3 @@ defining a new Op. It might not be exhaustive but it covers a lot of
common mistakes.
common mistakes.
WRITEME
WRITEME
doc/hpcs2011_tutorial/logreg_example.py
浏览文件 @
33667eb7
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
rng
=
np
.
random
rng
=
np
.
random
N
=
400
N
=
400
feats
=
784
feats
=
784
D
=
(
rng
.
randn
(
N
,
feats
)
.
astype
(
theano
.
config
.
floatX
),
rng
.
randint
(
size
=
N
,
low
=
0
,
high
=
2
)
.
astype
(
theano
.
config
.
floatX
))
D
=
(
rng
.
randn
(
N
,
feats
)
.
astype
(
theano
.
config
.
floatX
),
rng
.
randint
(
size
=
N
,
low
=
0
,
high
=
2
)
.
astype
(
theano
.
config
.
floatX
),
)
training_steps
=
10000
training_steps
=
10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x
=
T
.
matrix
(
"x"
)
x
=
tt
.
matrix
(
"x"
)
y
=
T
.
vector
(
"y"
)
y
=
tt
.
vector
(
"y"
)
w
=
theano
.
shared
(
rng
.
randn
(
feats
)
.
astype
(
theano
.
config
.
floatX
),
name
=
"w"
)
w
=
theano
.
shared
(
rng
.
randn
(
feats
)
.
astype
(
theano
.
config
.
floatX
),
name
=
"w"
)
b
=
theano
.
shared
(
np
.
asarray
(
0.
,
dtype
=
theano
.
config
.
floatX
),
name
=
"b"
)
b
=
theano
.
shared
(
np
.
asarray
(
0.
0
,
dtype
=
theano
.
config
.
floatX
),
name
=
"b"
)
x
.
tag
.
test_value
=
D
[
0
]
x
.
tag
.
test_value
=
D
[
0
]
y
.
tag
.
test_value
=
D
[
1
]
y
.
tag
.
test_value
=
D
[
1
]
#print "Initial model:"
#
print "Initial model:"
#print w.get_value(), b.get_value()
#
print w.get_value(), b.get_value()
# Construct Theano expression graph
# Construct Theano expression graph
p_1
=
1
/
(
1
+
T
.
exp
(
-
T
.
dot
(
x
,
w
)
-
b
))
# Probability of having a one
p_1
=
1
/
(
1
+
tt
.
exp
(
-
tt
.
dot
(
x
,
w
)
-
b
))
# Probability of having a one
prediction
=
p_1
>
0.5
# The prediction that is done: 0 or 1
prediction
=
p_1
>
0.5
# The prediction that is done: 0 or 1
xent
=
-
y
*
T
.
log
(
p_1
)
-
(
1
-
y
)
*
T
.
log
(
1
-
p_1
)
# Cross-entropy
xent
=
-
y
*
tt
.
log
(
p_1
)
-
(
1
-
y
)
*
tt
.
log
(
1
-
p_1
)
# Cross-entropy
cost
=
xent
.
mean
()
+
0.01
*
(
w
**
2
)
.
sum
()
# The cost to optimize
cost
=
xent
.
mean
()
+
0.01
*
(
w
**
2
)
.
sum
()
# The cost to optimize
gw
,
gb
=
T
.
grad
(
cost
,
[
w
,
b
])
gw
,
gb
=
tt
.
grad
(
cost
,
[
w
,
b
])
# Compile expressions to functions
# Compile expressions to functions
train
=
theano
.
function
(
train
=
theano
.
function
(
inputs
=
[
x
,
y
],
inputs
=
[
x
,
y
],
outputs
=
[
prediction
,
xent
],
outputs
=
[
prediction
,
xent
],
updates
=
{
w
:
w
-
0.01
*
gw
,
b
:
b
-
0.01
*
gb
},
updates
=
{
w
:
w
-
0.01
*
gw
,
b
:
b
-
0.01
*
gb
},
name
=
"train"
)
name
=
"train"
,
predict
=
theano
.
function
(
inputs
=
[
x
],
outputs
=
prediction
,
)
name
=
"predict"
)
predict
=
theano
.
function
(
inputs
=
[
x
],
outputs
=
prediction
,
name
=
"predict"
)
if
any
(
[
x
.
op
.
__class__
.
__name__
==
'Gemv'
for
x
in
train
.
maker
.
fgraph
.
toposort
()]):
if
any
(
[
x
.
op
.
__class__
.
__name__
==
"Gemv"
for
x
in
train
.
maker
.
fgraph
.
toposort
()]):
print
(
'Used the cpu'
)
print
(
"Used the cpu"
)
elif
any
(
[
x
.
op
.
__class__
.
__name__
==
'GpuGemm'
for
x
in
train
.
maker
.
fgraph
.
toposort
()]):
elif
any
(
[
x
.
op
.
__class__
.
__name__
==
"GpuGemm"
for
x
in
train
.
maker
.
fgraph
.
toposort
()]):
print
(
'Used the gpu'
)
print
(
"Used the gpu"
)
else
:
else
:
print
(
'ERROR, not able to tell if theano used the cpu or the gpu'
)
print
(
"ERROR, not able to tell if theano used the cpu or the gpu"
)
print
(
train
.
maker
.
fgraph
.
toposort
())
print
(
train
.
maker
.
fgraph
.
toposort
())
for
i
in
range
(
training_steps
):
for
i
in
range
(
training_steps
):
pred
,
err
=
train
(
D
[
0
],
D
[
1
])
pred
,
err
=
train
(
D
[
0
],
D
[
1
])
#print "Final model:"
#
print "Final model:"
#print w.get_value(), b.get_value()
#
print w.get_value(), b.get_value()
print
(
"target values for D"
)
print
(
"target values for D"
)
print
(
D
[
1
])
print
(
D
[
1
])
...
@@ -58,12 +60,14 @@ print("prediction on D")
...
@@ -58,12 +60,14 @@ print("prediction on D")
print
(
predict
(
D
[
0
]))
print
(
predict
(
D
[
0
]))
# Print the graph used in the slides
# Print the graph used in the slides
theano
.
printing
.
pydotprint
(
predict
,
theano
.
printing
.
pydotprint
(
outfile
=
"pics/logreg_pydotprint_predic.png"
,
predict
,
outfile
=
"pics/logreg_pydotprint_predic.png"
,
var_with_name_simple
=
True
var_with_name_simple
=
True
)
)
theano
.
printing
.
pydotprint
(
prediction
,
theano
.
printing
.
pydotprint
(
outfile
=
"pics/logreg_pydotprint_prediction.png"
,
prediction
,
var_with_name_simple
=
True
)
outfile
=
"pics/logreg_pydotprint_prediction.png"
,
theano
.
printing
.
pydotprint
(
train
,
var_with_name_simple
=
True
,
outfile
=
"pics/logreg_pydotprint_train.png"
,
)
var_with_name_simple
=
True
)
theano
.
printing
.
pydotprint
(
train
,
outfile
=
"pics/logreg_pydotprint_train.png"
,
var_with_name_simple
=
True
)
doc/hpcs2011_tutorial/presentation.tex
浏览文件 @
33667eb7
...
@@ -20,28 +20,28 @@ Laboratoire d'Informatique des Syst\`emes Adaptatifs \\
...
@@ -20,28 +20,28 @@ Laboratoire d'Informatique des Syst\`emes Adaptatifs \\
D
\'
epartement d'informatique et de recherche op
\'
erationelle
}
D
\'
epartement d'informatique et de recherche op
\'
erationelle
}
\date
{
\date
{
James Bergstra, Olivier Breuleux, Frederic Bastien,
James Bergstra, Olivier Breuleux, Frederic Bastien,
\vfill
\vfill
\vfill
\vfill
{
\small
{
\small
Arnaud Bergeron,
Arnaud Bergeron,
Yoshua Bengio,
Yoshua Bengio,
Thierry Bertin-Mahieux,
Thierry Bertin-Mahieux,
Josh Bleecher Snyder,
Josh Bleecher Snyder,
Olivier Delalleau,
Olivier Delalleau,
Guillaume Desjardins,
Guillaume Desjardins,
Douglas Eck,
Douglas Eck,
Dumitru Erhan,
Dumitru Erhan,
Xavier Glorot,
Xavier Glorot,
Ian Goodfellow,
Ian Goodfellow,
Philippe Hamel,
Philippe Hamel,
Pascal Lamblin,
Pascal Lamblin,
Simon Lemieux,
Simon Lemieux,
Michael Mandel,
Michael Mandel,
Razvan Pascanu,
Razvan Pascanu,
Fran
\c
{
c
}
ois Savard,
Fran
\c
{
c
}
ois Savard,
Joseph Turian,
Joseph Turian,
David Warde-Farley
David Warde-Farley
}
}
...
@@ -70,26 +70,26 @@ HPCS 2011, Montr\'eal
...
@@ -70,26 +70,26 @@ HPCS 2011, Montr\'eal
%{\small Université de Montr\'eal}
%{\small Université de Montr\'eal}
%\end{spacing}
%\end{spacing}
\vfill
\vfill
James Bergstra, Olivier Breuleux, Frederic Bastien,
James Bergstra, Olivier Breuleux, Frederic Bastien,
\vfill
\vfill
{
\footnotesize
%\small
{
\footnotesize
%\small
Arnaud Bergeron,
Arnaud Bergeron,
Yoshua Bengio,
Yoshua Bengio,
Thierry Bertin-Mahieux,
Thierry Bertin-Mahieux,
Josh Bleecher Snyder,
Josh Bleecher Snyder,
Olivier Delalleau,
Olivier Delalleau,
Guillaume Desjardins,
Guillaume Desjardins,
Douglas Eck,
Douglas Eck,
Dumitru Erhan,
Dumitru Erhan,
Xavier Glorot,
Xavier Glorot,
Ian Goodfellow,
Ian Goodfellow,
Philippe Hamel,
Philippe Hamel,
Pascal Lamblin,
Pascal Lamblin,
Simon Lemieux,
Simon Lemieux,
Michael Mandel,
Michael Mandel,
Razvan Pascanu,
Razvan Pascanu,
Fran
\c
{
c
}
ois Savard,
Fran
\c
{
c
}
ois Savard,
Joseph Turian,
Joseph Turian,
David Warde-Farley
David Warde-Farley
}
}
\vfill
\vfill
...
@@ -126,7 +126,7 @@ HPCS 2011, Montr\'eal
...
@@ -126,7 +126,7 @@ HPCS 2011, Montr\'eal
\frame
{
\frame
{
\frametitle
{
Project Status
}
\frametitle
{
Project Status
}
Why you can rely on Theano:
Why you can rely on Theano:
\begin{itemize}
\begin{itemize}
\item
Theano has been developed and used since January 2008 (3.5 yrs old)
\item
Theano has been developed and used since January 2008 (3.5 yrs old)
\item
Core technology for a funded Silicon-Valley startup
\item
Core technology for a funded Silicon-Valley startup
\item
Driven over 40 research papers in the last few years
\item
Driven over 40 research papers in the last few years
...
@@ -169,7 +169,7 @@ HPCS 2011, Montr\'eal
...
@@ -169,7 +169,7 @@ HPCS 2011, Montr\'eal
% gpu for exercices
% gpu for exercices
% Exercises 1 and how to download the files
% Exercises 1 and how to download the files
\item
Real example
\item
Real example
% More info on
T
.grad
% More info on
tt
.grad
% Where are the optimization in the example?
% Where are the optimization in the example?
% Exercises 2: logreg\_example.py
% Exercises 2: logreg\_example.py
\item
Theano Flags
\item
Theano Flags
...
@@ -232,7 +232,7 @@ HPCS 2011, Montr\'eal
...
@@ -232,7 +232,7 @@ HPCS 2011, Montr\'eal
\begin{tabular}
{
lcr
}
\begin{tabular}
{
lcr
}
%\imagetop{\includegraphics[width=1.in]{pics/theano_logo.png}}&
%\imagetop{\includegraphics[width=1.in]{pics/theano_logo.png}}&
\imagetop
{
\includegraphics
[width=.6in]
{
pics/pycuda-logo-crop.pdf
}}
\imagetop
{
\includegraphics
[width=.6in]
{
pics/pycuda-logo-crop.pdf
}}
\end{tabular}
\end{tabular}
}
}
\frame
{
\frame
{
...
@@ -257,7 +257,7 @@ HPCS 2011, Montr\'eal
...
@@ -257,7 +257,7 @@ HPCS 2011, Montr\'eal
\item
Quality of implementation
\item
Quality of implementation
\item
How much time was spent optimizing CPU vs GPU code
\item
How much time was spent optimizing CPU vs GPU code
\end{itemize}
\end{itemize}
\item
In Theory:
\item
In Theory:
\begin{itemize}
\begin{itemize}
\item
Intel Core i7 980 XE (107Gf/s float64) 6 cores
\item
Intel Core i7 980 XE (107Gf/s float64) 6 cores
\item
NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32) 480 cores
\item
NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32) 480 cores
...
@@ -393,7 +393,7 @@ HPCS 2011, Montr\'eal
...
@@ -393,7 +393,7 @@ HPCS 2011, Montr\'eal
\item
User mailing list: http://groups.google.com/group/theano-users
\item
User mailing list: http://groups.google.com/group/theano-users
\item
Deep Learning Tutorials: http://www.deeplearning.net/tutorial/
\item
Deep Learning Tutorials: http://www.deeplearning.net/tutorial/
\vfill
\vfill
\item
Installation: https://deeplearning.net/software/theano/install.html
\item
Installation: https://deeplearning.net/software/theano/install.html
\end{itemize}
\end{itemize}
}
}
...
@@ -407,7 +407,7 @@ HPCS 2011, Montr\'eal
...
@@ -407,7 +407,7 @@ HPCS 2011, Montr\'eal
\begin{itemize}
\begin{itemize}
\item
Theano computes derivatives of functions with one or many inputs.
\item
Theano computes derivatives of functions with one or many inputs.
\end{itemize}
\end{itemize}
\item
Speed and stability optimizations
\item
Speed and stability optimizations
\begin{itemize}
\begin{itemize}
\item
Gives the right answer for
$
\log
(
1
+
x
)
$
even if x is really tiny.
\item
Gives the right answer for
$
\log
(
1
+
x
)
$
even if x is really tiny.
\end{itemize}
\end{itemize}
...
@@ -422,12 +422,12 @@ HPCS 2011, Montr\'eal
...
@@ -422,12 +422,12 @@ HPCS 2011, Montr\'eal
\frame
{
\frame
{
\frametitle
{
Description 2
}
\frametitle
{
Description 2
}
\begin{itemize}
\begin{itemize}
\item
Extensive unit-testing and self-verification
\item
Extensive unit-testing and self-verification
\begin{itemize}
\begin{itemize}
\item
Detects and diagnoses many types of errors
\item
Detects and diagnoses many types of errors
\end{itemize}
\end{itemize}
\item
On CPU, common machine learning algorithms are 1.6x to 7.5x faster than competitive alternatives
\item
On CPU, common machine learning algorithms are 1.6x to 7.5x faster than competitive alternatives
\begin{itemize}
\begin{itemize}
\item
including specialized implementations in C/C++, NumPy, SciPy, and Matlab
\item
including specialized implementations in C/C++, NumPy, SciPy, and Matlab
\end{itemize}
\end{itemize}
...
@@ -443,7 +443,7 @@ HPCS 2011, Montr\'eal
...
@@ -443,7 +443,7 @@ HPCS 2011, Montr\'eal
\frametitle
{
Why Theano is better
}
\frametitle
{
Why Theano is better
}
Executing the code is faster because Theano:
Executing the code is faster because Theano:
\begin{itemize}
\begin{itemize}
\item
Rearranges high-level expressions
\item
Rearranges high-level expressions
\item
Produces customized low-level code
\item
Produces customized low-level code
\item
Uses a variety of backend technologies (GPU,...)
\item
Uses a variety of backend technologies (GPU,...)
\end{itemize}
\end{itemize}
...
@@ -459,7 +459,7 @@ HPCS 2011, Montr\'eal
...
@@ -459,7 +459,7 @@ HPCS 2011, Montr\'eal
\item
Theano do
{
\bf
automatic efficient symbolic differentiation
}
\item
Theano do
{
\bf
automatic efficient symbolic differentiation
}
\begin{itemize}
\begin{itemize}
\item
No need to manually differentiate your functions (tedious
\&
error-prone for complicated expressions!)
\item
No need to manually differentiate your functions (tedious
\&
error-prone for complicated expressions!)
\end{itemize}
\end{itemize}
\end{itemize}
\end{itemize}
}
}
...
@@ -518,7 +518,7 @@ Modify and execute the example to do this expression: a**2 + b**2 + 2*a*b
...
@@ -518,7 +518,7 @@ Modify and execute the example to do this expression: a**2 + b**2 + 2*a*b
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400
N = 400
...
@@ -532,8 +532,8 @@ training_steps = 10000
...
@@ -532,8 +532,8 @@ training_steps = 10000
\frametitle
{
A Real Example: Logistic Regression
}
\frametitle
{
A Real Example: Logistic Regression
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
{
\color
{
gray
}
# Declare Theano symbolic variables
}
{
\color
{
gray
}
# Declare Theano symbolic variables
}
x =
T
.matrix("x")
x =
tt
.matrix("x")
y =
T
.vector("y")
y =
tt
.vector("y")
\codeHighlight
{
w = theano.shared(rng.randn(100), name="w")
}
\codeHighlight
{
w = theano.shared(rng.randn(100), name="w")
}
\codeHighlight
{
b = theano.shared(0., name="b")
}
\codeHighlight
{
b = theano.shared(0., name="b")
}
print "Initial model:"
print "Initial model:"
...
@@ -545,32 +545,32 @@ print w.get_value(), b.get_value()
...
@@ -545,32 +545,32 @@ print w.get_value(), b.get_value()
\frametitle
{
A Real Example: Logistic Regression
}
\frametitle
{
A Real Example: Logistic Regression
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
{
\color
{
gray
}
# Declare Theano symbolic variables
}
{
\color
{
gray
}
# Declare Theano symbolic variables
}
{
\color
{
gray
}
x =
T
.matrix("x")
}
{
\color
{
gray
}
x =
tt
.matrix("x")
}
{
\color
{
gray
}
y =
T
.vector("y")
}
{
\color
{
gray
}
y =
tt
.vector("y")
}
{
\color
{
gray
}
w = theano.shared(rng.randn(100), name="w")
}
{
\color
{
gray
}
w = theano.shared(rng.randn(100), name="w")
}
{
\color
{
gray
}
b = theano.shared(0., name="b")
}
{
\color
{
gray
}
b = theano.shared(0., name="b")
}
{
\color
{
gray
}
# Construct Theano expression graph
}
{
\color
{
gray
}
# Construct Theano expression graph
}
p
_
1 = 1 / (1 +
T
.exp(-T.dot(x, w)-b))
{
\color
{
gray
}
# Probability that target = 1
}
p
_
1 = 1 / (1 +
tt
.exp(-T.dot(x, w)-b))
{
\color
{
gray
}
# Probability that target = 1
}
prediction = p
_
1 > 0.5
{
\color
{
gray
}
# The prediction thresholded
}
prediction = p
_
1 > 0.5
{
\color
{
gray
}
# The prediction thresholded
}
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
{
\color
{
gray
}
# Cross-entropy loss function
}
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
{
\color
{
gray
}
# Cross-entropy loss function
}
cost = xent.mean() + 0.01*(w**2).sum()
{
\color
{
gray
}
# The cost to minimize
}
cost = xent.mean() + 0.01*(w**2).sum()
{
\color
{
gray
}
# The cost to minimize
}
\codeHighlight
{
gw,gb =
T
.grad(cost, [w,b])
}
\codeHighlight
{
gw,gb =
tt
.grad(cost, [w,b])
}
\end{Verbatim}
\end{Verbatim}
\end{frame}
\end{frame}
\begin{frame}
[fragile]
\begin{frame}
[fragile]
\frametitle
{
A Real Example: Logistic Regression
}
\frametitle
{
A Real Example: Logistic Regression
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
{
\color
{
gray
}
x =
T
.matrix("x")
}
{
\color
{
gray
}
x =
tt
.matrix("x")
}
{
\color
{
gray
}
y =
T
.vector("y")
}
{
\color
{
gray
}
y =
tt
.vector("y")
}
{
\color
{
gray
}
w = theano.shared(rng.randn(100), name="w")
}
{
\color
{
gray
}
w = theano.shared(rng.randn(100), name="w")
}
{
\color
{
gray
}
b = theano.shared(0., name="b")
}
{
\color
{
gray
}
b = theano.shared(0., name="b")
}
{
\color
{
gray
}
p
_
1 = 1 / (1 +
T
.exp(-T.dot(x, w)-b))
}
{
\color
{
gray
}
p
_
1 = 1 / (1 +
tt
.exp(-T.dot(x, w)-b))
}
{
\color
{
gray
}
prediction = p
_
1 > 0.5
}
{
\color
{
gray
}
prediction = p
_
1 > 0.5
}
{
\color
{
gray
}
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
}
{
\color
{
gray
}
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
}
{
\color
{
gray
}
cost = xent.mean() + 0.01*(w**2).sum()
}
{
\color
{
gray
}
cost = xent.mean() + 0.01*(w**2).sum()
}
{
\color
{
gray
}
gw,gb =
T
.grad(cost, [w,b])
}
{
\color
{
gray
}
gw,gb =
tt
.grad(cost, [w,b])
}
{
\color
{
gray
}
# Compile
}
{
\color
{
gray
}
# Compile
}
train = theano.function(
train = theano.function(
...
@@ -598,11 +598,11 @@ print "prediction on D:", predict(D[0])
...
@@ -598,11 +598,11 @@ print "prediction on D:", predict(D[0])
\begin{frame}
[fragile]
\begin{frame}
[fragile]
\frametitle
{
A Real Example: optimization
}
\frametitle
{
A Real Example: optimization
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
p
_
1 = 1 / (1 +
T
.exp(-T.dot(x, w)-b))
p
_
1 = 1 / (1 +
tt
.exp(-T.dot(x, w)-b))
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
prediction = p
_
1 > 0.5
prediction = p
_
1 > 0.5
cost = xent.mean() + 0.01*(w**2).sum()
cost = xent.mean() + 0.01*(w**2).sum()
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
train = theano.function(
train = theano.function(
inputs=[x,y],
inputs=[x,y],
...
@@ -612,7 +612,7 @@ train = theano.function(
...
@@ -612,7 +612,7 @@ train = theano.function(
Where are those optimization applied?
Where are those optimization applied?
\begin{itemize}
\begin{itemize}
\item
Log(1+exp(x))
\item
Log(1+exp(x))
\item
1 / (1 +
T
.exp(var)) (sigmoid)
\item
1 / (1 +
tt
.exp(var)) (sigmoid)
\item
Log(1-sigmoid(var)) (softplus, stabilisation)
\item
Log(1-sigmoid(var)) (softplus, stabilisation)
\item
GEMV (matrix-vector multiply from BLAS)
\item
GEMV (matrix-vector multiply from BLAS)
\item
Loop fusion
\item
Loop fusion
...
@@ -622,14 +622,14 @@ Where are those optimization applied?
...
@@ -622,14 +622,14 @@ Where are those optimization applied?
\begin{frame}
[fragile]
\begin{frame}
[fragile]
\frametitle
{
A Real Example: optimization!
}
\frametitle
{
A Real Example: optimization!
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
p
_
1 = 1 / (1 +
T
.exp(-T.dot(x, w)-b))
p
_
1 = 1 / (1 +
tt
.exp(-T.dot(x, w)-b))
\codeHighlight
{
# 1 / (1 +
T
.exp(var)) -> sigmoid(var)
}
\codeHighlight
{
# 1 / (1 +
tt
.exp(var)) -> sigmoid(var)
}
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
xent = -y*T.log(p
_
1) - (1-y)*T.log(1-p
_
1)
\codeHighlight
{
# Log(1-sigmoid(var)) -> -sigmoid(var)
}
\codeHighlight
{
# Log(1-sigmoid(var)) -> -sigmoid(var)
}
prediction = p
_
1 > 0.5
prediction = p
_
1 > 0.5
cost = xent.mean() + 0.01*(w**2).sum()
cost = xent.mean() + 0.01*(w**2).sum()
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
train = theano.function(
train = theano.function(
inputs=[x,y],
inputs=[x,y],
...
@@ -680,7 +680,7 @@ Modify and execute the example in the file logreg\_example.py to run on CPU with
...
@@ -680,7 +680,7 @@ Modify and execute the example in the file logreg\_example.py to run on CPU with
\item
Be sure to use
\texttt
{
floatX
}
(
\texttt
{
theano.config.floatX
}
) in your code
\item
Be sure to use
\texttt
{
floatX
}
(
\texttt
{
theano.config.floatX
}
) in your code
\item
Cast inputs before putting them into a shared variable
\item
Cast inputs before putting them into a shared variable
\item
Cast "problem": int32 with float32
$
\to
$
float64
\item
Cast "problem": int32 with float32
$
\to
$
float64
\begin{itemize}
\begin{itemize}
\item
A new casting mechanism is being developed
\item
A new casting mechanism is being developed
\item
Insert manual cast in your code or use [u]int
{
8,16
}
\item
Insert manual cast in your code or use [u]int
{
8,16
}
\item
Insert manual cast around the mean operator (which involves a division by the length, which is an int64!)
\item
Insert manual cast around the mean operator (which involves a division by the length, which is an int64!)
...
@@ -727,18 +727,18 @@ Computers in the class
...
@@ -727,18 +727,18 @@ Computers in the class
\begin{itemize}
\begin{itemize}
\item
\#
Dimensions
\item
\#
Dimensions
\begin{itemize}
\begin{itemize}
\item
T.scalar, T.vector, T.matrix, T.tensor3, T
.tensor4
\item
tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt
.tensor4
\end{itemize}
\end{itemize}
\item
Dtype
\item
Dtype
\begin{itemize}
\begin{itemize}
\item
T
.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
\item
tt
.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
\item
T
.vector
$
\to
$
floatX dtype
\item
tt
.vector
$
\to
$
floatX dtype
\item
floatX: configurable dtype that can be float32 or float64.
\item
floatX: configurable dtype that can be float32 or float64.
\end{itemize}
\end{itemize}
\item
Custom variable
\item
Custom variable
\begin{itemize}
\begin{itemize}
\item
All are shortcuts to:
T
.tensor(dtype, broadcastable=[False]*nd)
\item
All are shortcuts to:
tt
.tensor(dtype, broadcastable=[False]*nd)
\item
Other dtype: uint[8,16,32,64], floatX
\item
Other dtype: uint[8,16,32,64], floatX
\end{itemize}
\end{itemize}
\end{itemize}
\end{itemize}
...
@@ -747,15 +747,15 @@ Computers in the class
...
@@ -747,15 +747,15 @@ Computers in the class
\frame
{
\frame
{
\frametitle
{
Creating symbolic variables: Broadcastability
}
\frametitle
{
Creating symbolic variables: Broadcastability
}
\begin{itemize}
\begin{itemize}
\item
Remember what I said about broadcasting?
\item
Remember what I said about broadcasting?
\item
How to add a row to all rows of a matrix?
\item
How to add a row to all rows of a matrix?
\item
How to add a column to all columns of a matrix?
\item
How to add a column to all columns of a matrix?
\end{itemize}
\end{itemize}
\vfill
\vfill
\begin{itemize}
\begin{itemize}
\item
Broadcastability must be specified when creating the variable
\item
Broadcastability must be specified when creating the variable
\item
The only shorcut with broadcastable dimensions are:
{
\bf
T.row
}
and
{
\bf
T
.col
}
\item
The only shorcut with broadcastable dimensions are:
{
\bf
tt.row
}
and
{
\bf
tt
.col
}
\item
For all others:
T
.tensor(dtype, broadcastable=
{
\bf
([False or True])*nd
}
)
\item
For all others:
tt
.tensor(dtype, broadcastable=
{
\bf
([False or True])*nd
}
)
\end{itemize}
\end{itemize}
}
}
...
@@ -763,12 +763,12 @@ Computers in the class
...
@@ -763,12 +763,12 @@ Computers in the class
\begin{frame}
[fragile]
\begin{frame}
[fragile]
\frametitle
{
Differentiation Details
}
\frametitle
{
Differentiation Details
}
\begin{Verbatim}
[commandchars=
\\\{\}
]
\begin{Verbatim}
[commandchars=
\\\{\}
]
{
\color
{
gray
}
gw,gb =
T
.grad(cost, [w,b])
}
{
\color
{
gray
}
gw,gb =
tt
.grad(cost, [w,b])
}
\end{Verbatim}
\end{Verbatim}
\begin{itemize}
\begin{itemize}
\item
T
.grad works symbolically: takes and returns a Theano variable
\item
tt
.grad works symbolically: takes and returns a Theano variable
\item
T
.grad can be compared to a macro: it can be applied multiple times
\item
tt
.grad can be compared to a macro: it can be applied multiple times
\item
T
.grad takes scalar costs only
\item
tt
.grad takes scalar costs only
\item
Simple recipe allows to compute efficiently vector
$
\times
$
Jacobian and vector
$
\times
$
Hessian
\item
Simple recipe allows to compute efficiently vector
$
\times
$
Jacobian and vector
$
\times
$
Hessian
\item
We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian
$
\times
$
vector
\item
We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian
$
\times
$
vector
\end{itemize}
\end{itemize}
...
@@ -848,7 +848,7 @@ Convolutional Network: 256x256 images convolved with 6 7x7 filters, downsampled
...
@@ -848,7 +848,7 @@ Convolutional Network: 256x256 images convolved with 6 7x7 filters, downsampled
\frametitle
{
Profile Mode
}
\frametitle
{
Profile Mode
}
To replace the default mode with this mode, use the Theano flags
\texttt
{
mode=ProfileMode
}
To replace the default mode with this mode, use the Theano flags
\texttt
{
mode=ProfileMode
}
To enable the memory profiling use the flags
\texttt
{
ProfileMode.profile
\_
memory=True
}
To enable the memory profiling use the flags
\texttt
{
ProfileMode.profile
\_
memory=True
}
\begin{Verbatim}
\begin{Verbatim}
Time since import 33.456s
Time since import 33.456s
Theano compile time: 1.023s (3.1
% since import)
Theano compile time: 1.023s (3.1
% since import)
...
@@ -881,7 +881,7 @@ Theano outputs:
...
@@ -881,7 +881,7 @@ Theano outputs:
\vfill
\vfill
\begin{Verbatim}
\begin{Verbatim}
Single Op-wise summary:
Single Op-wise summary:
<
% of local_time spent on this kind of Op> <cumulative %>
<
% of local_time spent on this kind of Op> <cumulative %>
<self seconds> <cumulative seconds> <time per call> <nb
_
call>
<self seconds> <cumulative seconds> <time per call> <nb
_
call>
<nb
_
op> <nb
_
apply> <Op name>
<nb
_
op> <nb
_
apply> <Op name>
87.3
% 87.3% 25.672s 25.672s 2.57e-03s 10000 1 1 <Gemv>
87.3
% 87.3% 25.672s 25.672s 2.57e-03s 10000 1 1 <Gemv>
...
@@ -933,7 +933,7 @@ Apply-wise summary:
...
@@ -933,7 +933,7 @@ Apply-wise summary:
0.4
% 98.7% 0.127s 29.020s 1.27e-05s 10000 10 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
0.4
% 98.7% 0.127s 29.020s 1.27e-05s 10000 10 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
0.3
% 99.0% 0.092s 29.112s 9.16e-06s 10000 13 Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)](Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)].0, Alloc.0, y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, InplaceDimShuffle{x}.0)
0.3
% 99.0% 0.092s 29.112s 9.16e-06s 10000 13 Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)](Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)].0, Alloc.0, y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, InplaceDimShuffle{x}.0)
0.3
% 99.3% 0.080s 29.192s 7.99e-06s 10000 11 Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)](Elemwise{neg,no_inplace}.0)
0.3
% 99.3% 0.080s 29.192s 7.99e-06s 10000 11 Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)](Elemwise{neg,no_inplace}.0)
... (remaining 14 Apply instances account for
... (remaining 14 Apply instances account for
0.7
%(0.00s) of the runtime)
0.7
%(0.00s) of the runtime)
\end{Verbatim}
\end{Verbatim}
\end{frame}
\end{frame}
...
@@ -944,7 +944,7 @@ Theano outputs:
...
@@ -944,7 +944,7 @@ Theano outputs:
\vfill
\vfill
\begin{Verbatim}
\begin{Verbatim}
Profile of Theano functions memory:
Profile of Theano functions memory:
(This check only the output of each apply node. It don't check the
(This check only the output of each apply node. It don't check the
temporary memory used by the op in the apply node.)
temporary memory used by the op in the apply node.)
Theano fct: train
Theano fct: train
Max without gc, inplace and view (KB) 2481
Max without gc, inplace and view (KB) 2481
...
@@ -953,7 +953,7 @@ Theano fct: train
...
@@ -953,7 +953,7 @@ Theano fct: train
Memory saved by view (KB) 2450
Memory saved by view (KB) 2450
Memory saved by inplace (KB) 15
Memory saved by inplace (KB) 15
Memory saved by GC (KB) 0
Memory saved by GC (KB) 0
<Sum apply outputs (bytes)> <Apply outputs memory size(bytes)>
<Sum apply outputs (bytes)> <Apply outputs memory size(bytes)>
<created/inplace/view> <Apply node>
<created/inplace/view> <Apply node>
<created/inplace/view> is taked from the op declaration, not ...
<created/inplace/view> is taked from the op declaration, not ...
2508800B [2508800] v InplaceDimShuffle
{
1,0
}
(x)
2508800B [2508800] v InplaceDimShuffle
{
1,0
}
(x)
...
@@ -1005,22 +1005,22 @@ theano.printing.debugprint({fct, variable, list of variables})
...
@@ -1005,22 +1005,22 @@ theano.printing.debugprint({fct, variable, list of variables})
\small
\small
\begin{Verbatim}
\begin{Verbatim}
>>> theano.printing.debugprint(prediction)
>>> theano.printing.debugprint(prediction)
Elemwise
{
gt,no
_
inplace
}
[@181772236] ''
Elemwise
{
gt,no
_
inplace
}
[@181772236] ''
|Elemwise
{
true
_
div,no
_
inplace
}
[@181746668] ''
|Elemwise
{
true
_
div,no
_
inplace
}
[@181746668] ''
| |InplaceDimShuffle
{
x
}
[@181746412] ''
| |InplaceDimShuffle
{
x
}
[@181746412] ''
| | |TensorConstant
{
1
}
[@181745836]
| | |TensorConstant
{
1
}
[@181745836]
| |Elemwise
{
add,no
_
inplace
}
[@181745644] ''
| |Elemwise
{
add,no
_
inplace
}
[@181745644] ''
| | |InplaceDimShuffle
{
x
}
[@181745420] ''
| | |InplaceDimShuffle
{
x
}
[@181745420] ''
| | | |TensorConstant
{
1
}
[@181744844]
| | | |TensorConstant
{
1
}
[@181744844]
| | |Elemwise
{
exp,no
_
inplace
}
[@181744652] ''
| | |Elemwise
{
exp,no
_
inplace
}
[@181744652] ''
| | | |Elemwise
{
sub,no
_
inplace
}
[@181744012] ''
| | | |Elemwise
{
sub,no
_
inplace
}
[@181744012] ''
| | | | |Elemwise
{
neg,no
_
inplace
}
[@181730764] ''
| | | | |Elemwise
{
neg,no
_
inplace
}
[@181730764] ''
| | | | | |dot [@181729676] ''
| | | | | |dot [@181729676] ''
| | | | | | |x [@181563948]
| | | | | | |x [@181563948]
| | | | | | |w [@181729964]
| | | | | | |w [@181729964]
| | | | |InplaceDimShuffle
{
x
}
[@181743788] ''
| | | | |InplaceDimShuffle
{
x
}
[@181743788] ''
| | | | | |b [@181730156]
| | | | | |b [@181730156]
|InplaceDimShuffle
{
x
}
[@181771788] ''
|InplaceDimShuffle
{
x
}
[@181771788] ''
| |TensorConstant
{
0.5
}
[@181771148]
| |TensorConstant
{
0.5
}
[@181771148]
\end{Verbatim}
\end{Verbatim}
\end{frame}
\end{frame}
...
@@ -1108,13 +1108,13 @@ All pydotprint* requires graphviz and pydot
...
@@ -1108,13 +1108,13 @@ All pydotprint* requires graphviz and pydot
\item
``sum()`` could be computed by scanning the
$
z
+
x
_
i
$
function
\item
``sum()`` could be computed by scanning the
$
z
+
x
_
i
$
function
over a list, given an initial state of ``z=0``.
over a list, given an initial state of ``z=0``.
\item
Often a for-loop can be expressed as a ``scan()`` operation, and
\item
Often a for-loop can be expressed as a ``scan()`` operation, and
``scan`` is the closest that Theano comes to looping.
``scan`` is the closest that Theano comes to looping.
\item
The advantage of using ``scan`` over for loops
\item
The advantage of using ``scan`` over for loops
\begin{itemize}
\begin{itemize}
\item
The number of iterations to be part of the symbolic graph
\item
The number of iterations to be part of the symbolic graph
\item
Minimizes GPU transfers if GPU is involved
\item
Minimizes GPU transfers if GPU is involved
\item
Compute gradients through sequential steps
\item
Compute gradients through sequential steps
\item
Slightly faster then using a for loop in Python with a compiled Theano function
\item
Slightly faster then using a for loop in Python with a compiled Theano function
\item
Can lower the overall memory usage by detecting the actual
\\
amount of memory needed
\item
Can lower the overall memory usage by detecting the actual
\\
amount of memory needed
\end{itemize}
\end{itemize}
\end{itemize}
\end{itemize}
...
@@ -1123,7 +1123,7 @@ All pydotprint* requires graphviz and pydot
...
@@ -1123,7 +1123,7 @@ All pydotprint* requires graphviz and pydot
\begin{frame}
[fragile]
\begin{frame}
[fragile]
\frametitle
{
Scan Example: Computing pow(A,k)
}
\frametitle
{
Scan Example: Computing pow(A,k)
}
\begin{Verbatim}
\begin{Verbatim}
k =
T.iscalar("k"); A = T
.vector("A")
k =
tt.iscalar("k"); A = tt
.vector("A")
def inner
_
fct(prior
_
result, A): return prior
_
result * A
def inner
_
fct(prior
_
result, A): return prior
_
result * A
# Symbolic description of the result
# Symbolic description of the result
...
@@ -1147,11 +1147,11 @@ print power(range(10),2)
...
@@ -1147,11 +1147,11 @@ print power(range(10),2)
\frametitle
{
Scan Example: Calculating a Polynomial
}
\frametitle
{
Scan Example: Calculating a Polynomial
}
\begin{Verbatim}
\begin{Verbatim}
coefficients = theano.tensor.vector("coefficients")
coefficients = theano.tensor.vector("coefficients")
x =
T
.scalar("x"); max
_
coefficients
_
supported = 10000
x =
tt
.scalar("x"); max
_
coefficients
_
supported = 10000
# Generate the components of the polynomial
# Generate the components of the polynomial
full
_
range=theano.tensor.arange(max
_
coefficients
_
supported)
full
_
range=theano.tensor.arange(max
_
coefficients
_
supported)
components, updates = theano.scan(fn=lambda coeff, power, free
_
var:
components, updates = theano.scan(fn=lambda coeff, power, free
_
var:
coeff * (free
_
var ** power),
coeff * (free
_
var ** power),
outputs
_
info=None,
outputs
_
info=None,
sequences=[coefficients, full
_
range],
sequences=[coefficients, full
_
range],
...
@@ -1187,7 +1187,7 @@ print calculate_polynomial(test_coeff, 3)
...
@@ -1187,7 +1187,7 @@ print calculate_polynomial(test_coeff, 3)
\item
Needs a certain number of operations to be useful
\item
Needs a certain number of operations to be useful
\item
We have started working on this in a branch
\item
We have started working on this in a branch
\end{itemize}
\end{itemize}
\item
Compilation time superlinear in the size of the graph.
\item
Compilation time superlinear in the size of the graph.
\begin{itemize}
\begin{itemize}
\item
A few hundreds nodes is fine
\item
A few hundreds nodes is fine
\item
Disabling a few optimizations can speed up compilation
\item
Disabling a few optimizations can speed up compilation
...
@@ -1581,7 +1581,7 @@ print numpy.asarray(f(xv))
...
@@ -1581,7 +1581,7 @@ print numpy.asarray(f(xv))
\item
I presented a tool that tries to be the holy grail in computing:
{
\bf
easy to code
}
and
{
\bf
fast to execute
}
!
\item
I presented a tool that tries to be the holy grail in computing:
{
\bf
easy to code
}
and
{
\bf
fast to execute
}
!
\item
Generates fast, custom CPU code
\textit
{
and
}
GPU code
\item
Generates fast, custom CPU code
\textit
{
and
}
GPU code
\item
You can easily wrap existing CPU/GPU code with Theano
\item
You can easily wrap existing CPU/GPU code with Theano
\item
It
{
\bf
works
}
and is
{
\bf
used in the real world
}
by academic researchers
\textit
{
and
}
industry
\item
It
{
\bf
works
}
and is
{
\bf
used in the real world
}
by academic researchers
\textit
{
and
}
industry
\end{itemize}
\end{itemize}
}
}
...
...
doc/hpcs2011_tutorial/scan_poly.py
浏览文件 @
33667eb7
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
coefficients
=
theano
.
tensor
.
vector
(
"coefficients"
)
coefficients
=
theano
.
tensor
.
vector
(
"coefficients"
)
x
=
T
.
scalar
(
"x"
);
max_coefficients_supported
=
10000
x
=
tt
.
scalar
(
"x"
)
max_coefficients_supported
=
10000
# Generate the components of the polynomial
# Generate the components of the polynomial
full_range
=
theano
.
tensor
.
arange
(
max_coefficients_supported
)
full_range
=
theano
.
tensor
.
arange
(
max_coefficients_supported
)
components
,
updates
=
theano
.
scan
(
fn
=
lambda
coeff
,
power
,
free_var
:
components
,
updates
=
theano
.
scan
(
coeff
*
(
free_var
**
power
),
fn
=
lambda
coeff
,
power
,
free_var
:
coeff
*
(
free_var
**
power
),
outputs_info
=
None
,
outputs_info
=
None
,
sequences
=
[
coefficients
,
full_range
],
sequences
=
[
coefficients
,
full_range
],
non_sequences
=
x
)
non_sequences
=
x
,
)
polynomial
=
components
.
sum
()
polynomial
=
components
.
sum
()
calculate_polynomial
=
theano
.
function
(
inputs
=
[
coefficients
,
x
],
calculate_polynomial
=
theano
.
function
(
inputs
=
[
coefficients
,
x
],
outputs
=
polynomial
)
outputs
=
polynomial
)
test_coeff
=
np
.
asarray
([
1
,
0
,
2
],
dtype
=
np
.
float32
)
test_coeff
=
np
.
asarray
([
1
,
0
,
2
],
dtype
=
np
.
float32
)
print
(
calculate_polynomial
(
test_coeff
,
3
))
print
(
calculate_polynomial
(
test_coeff
,
3
))
...
...
doc/hpcs2011_tutorial/scan_pow.py
浏览文件 @
33667eb7
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
k
=
tt
.
iscalar
(
"k"
)
A
=
tt
.
vector
(
"A"
)
def
inner_fct
(
prior_result
,
A
):
return
prior_result
*
A
k
=
T
.
iscalar
(
"k"
);
A
=
T
.
vector
(
"A"
)
def
inner_fct
(
prior_result
,
A
):
return
prior_result
*
A
# Symbolic description of the result
# Symbolic description of the result
result
,
updates
=
theano
.
scan
(
fn
=
inner_fct
,
result
,
updates
=
theano
.
scan
(
outputs_info
=
T
.
ones_like
(
A
),
fn
=
inner_fct
,
outputs_info
=
tt
.
ones_like
(
A
),
non_sequences
=
A
,
n_steps
=
k
non_sequences
=
A
,
n_steps
=
k
)
)
# Scan has provided us with A**1 through A**k. Keep only the last
# Scan has provided us with A**1 through A**k. Keep only the last
# value. Scan notices this and does not waste memory saving them.
# value. Scan notices this and does not waste memory saving them.
final_result
=
result
[
-
1
]
final_result
=
result
[
-
1
]
power
=
theano
.
function
(
inputs
=
[
A
,
k
],
outputs
=
final_result
,
power
=
theano
.
function
(
inputs
=
[
A
,
k
],
outputs
=
final_result
,
updates
=
updates
)
updates
=
updates
)
print
(
power
(
list
(
range
(
10
)),
2
))
print
(
power
(
list
(
range
(
10
)),
2
))
doc/library/compile/io.txt
浏览文件 @
33667eb7
...
@@ -80,10 +80,10 @@ A non-None `value` argument makes an In() instance an optional parameter
...
@@ -80,10 +80,10 @@ A non-None `value` argument makes an In() instance an optional parameter
of the compiled function. For example, in the following code we are
of the compiled function. For example, in the following code we are
defining an arity-2 function ``inc``.
defining an arity-2 function ``inc``.
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> from theano import function
>>> from theano import function
>>> from theano.compile.io import In
>>> from theano.compile.io import In
>>> u, x, s =
T
.scalars('u', 'x', 's')
>>> u, x, s =
tt
.scalars('u', 'x', 's')
>>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
>>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
Since we provided a ``value`` for ``s`` and ``x``, we can call it with just a value for ``u`` like this:
Since we provided a ``value`` for ``s`` and ``x``, we can call it with just a value for ``u`` like this:
...
@@ -183,8 +183,8 @@ method to access values by indexing a Function directly by typing
...
@@ -183,8 +183,8 @@ method to access values by indexing a Function directly by typing
To show some examples of these access methods...
To show some examples of these access methods...
>>> from theano import tensor as
T
, function
>>> from theano import tensor as
tt
, function
>>> a, b, c =
T
.scalars('xys') # set the internal names of graph nodes
>>> a, b, c =
tt
.scalars('xys') # set the internal names of graph nodes
>>> # Note that the name of c is 's', not 'c'!
>>> # Note that the name of c is 's', not 'c'!
>>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
>>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
...
@@ -236,12 +236,12 @@ Every element of the inputs list will be upgraded to an In instance if necessary
...
@@ -236,12 +236,12 @@ Every element of the inputs list will be upgraded to an In instance if necessary
Example:
Example:
>>> import theano
>>> import theano
>>> from theano import tensor as
T
>>> from theano import tensor as
tt
>>> from theano.compile.io import In
>>> from theano.compile.io import In
>>> x =
T
.scalar()
>>> x =
tt
.scalar()
>>> y =
T
.scalar('y')
>>> y =
tt
.scalar('y')
>>> z =
T
.scalar('z')
>>> z =
tt
.scalar('z')
>>> w =
T
.scalar('w')
>>> w =
tt
.scalar('w')
>>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
>>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
... outputs=x + y + z)
... outputs=x + y + z)
...
@@ -308,7 +308,7 @@ If a list of ``Variable`` or ``Out`` instances is given as argument, then the co
...
@@ -308,7 +308,7 @@ If a list of ``Variable`` or ``Out`` instances is given as argument, then the co
>>> import numpy
>>> import numpy
>>> from theano.compile.io import Out
>>> from theano.compile.io import Out
>>> x, y, s =
T
.matrices('xys')
>>> x, y, s =
tt
.matrices('xys')
>>> # print a list of 2 ndarrays
>>> # print a list of 2 ndarrays
>>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
>>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
...
...
doc/library/compile/nanguardmode.txt
浏览文件 @
33667eb7
...
@@ -25,12 +25,12 @@ NanGuardMode can be used as follows:
...
@@ -25,12 +25,12 @@ NanGuardMode can be used as follows:
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
from theano.compile.nanguardmode import NanGuardMode
from theano.compile.nanguardmode import NanGuardMode
x =
T
.matrix()
x =
tt
.matrix()
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
y =
T
.dot(x, w)
y =
tt
.dot(x, w)
fun = theano.function(
fun = theano.function(
[x], y,
[x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
...
...
doc/library/d3viz/index.ipynb
浏览文件 @
33667eb7
...
@@ -72,7 +72,7 @@
...
@@ -72,7 +72,7 @@
"outputs": [],
"outputs": [],
"source": [
"source": [
"import theano as th\n",
"import theano as th\n",
"import theano.tensor as
T
\n",
"import theano.tensor as
tt
\n",
"import numpy as np"
"import numpy as np"
]
]
},
},
...
@@ -97,14 +97,14 @@
...
@@ -97,14 +97,14 @@
"nhiddens = 50\n",
"nhiddens = 50\n",
"\n",
"\n",
"rng = np.random.RandomState(0)\n",
"rng = np.random.RandomState(0)\n",
"x =
T
.dmatrix('x')\n",
"x =
tt
.dmatrix('x')\n",
"wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)\n",
"wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)\n",
"bh = th.shared(np.zeros(nhiddens), borrow=True)\n",
"bh = th.shared(np.zeros(nhiddens), borrow=True)\n",
"h =
T.nnet.sigmoid(T
.dot(x, wh) + bh)\n",
"h =
tt.nnet.sigmoid(tt
.dot(x, wh) + bh)\n",
"\n",
"\n",
"wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))\n",
"wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))\n",
"by = th.shared(np.zeros(noutputs), borrow=True)\n",
"by = th.shared(np.zeros(noutputs), borrow=True)\n",
"y =
T.nnet.softmax(T
.dot(h, wy) + by)\n",
"y =
tt.nnet.softmax(tt
.dot(h, wy) + by)\n",
"\n",
"\n",
"predict = th.function([x], y)"
"predict = th.function([x], y)"
]
]
...
@@ -389,8 +389,8 @@
...
@@ -389,8 +389,8 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"x, y, z =
T
.scalars('xyz')\n",
"x, y, z =
tt
.scalars('xyz')\n",
"e =
T
.nnet.sigmoid((x + y + z)**2)\n",
"e =
tt
.nnet.sigmoid((x + y + z)**2)\n",
"op = th.OpFromGraph([x, y, z], [e])\n",
"op = th.OpFromGraph([x, y, z], [e])\n",
"\n",
"\n",
"e2 = op(x, y, z) + op(z, y, x)\n",
"e2 = op(x, y, z) + op(z, y, x)\n",
...
@@ -434,7 +434,7 @@
...
@@ -434,7 +434,7 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"x, y, z =
T
.scalars('xyz')\n",
"x, y, z =
tt
.scalars('xyz')\n",
"e = x * y\n",
"e = x * y\n",
"op = th.OpFromGraph([x, y], [e])\n",
"op = th.OpFromGraph([x, y], [e])\n",
"e2 = op(x, y) + z\n",
"e2 = op(x, y) + z\n",
...
...
doc/library/d3viz/index.txt
浏览文件 @
33667eb7
...
@@ -44,7 +44,7 @@ web-browsers. ``d3viz`` allows
...
@@ -44,7 +44,7 @@ web-browsers. ``d3viz`` allows
- to explore nested graphs such as OpFromGraph nodes.
- to explore nested graphs such as OpFromGraph nodes.
.. note::
.. note::
This userguide is also available as
This userguide is also available as
:download:`IPython notebook <index.ipynb>`.
:download:`IPython notebook <index.ipynb>`.
...
@@ -54,24 +54,24 @@ hidden layer and a softmax output layer.
...
@@ -54,24 +54,24 @@ hidden layer and a softmax output layer.
.. code:: python
.. code:: python
import theano as th
import theano as th
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
ninputs = 1000
ninputs = 1000
nfeatures = 100
nfeatures = 100
noutputs = 10
noutputs = 10
nhiddens = 50
nhiddens = 50
rng = np.random.RandomState(0)
rng = np.random.RandomState(0)
x =
T
.dmatrix('x')
x =
tt
.dmatrix('x')
wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
bh = th.shared(np.zeros(nhiddens), borrow=True)
bh = th.shared(np.zeros(nhiddens), borrow=True)
h =
T.nnet.sigmoid(T
.dot(x, wh) + bh)
h =
tt.nnet.sigmoid(tt
.dot(x, wh) + bh)
wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))
wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))
by = th.shared(np.zeros(noutputs), borrow=True)
by = th.shared(np.zeros(noutputs), borrow=True)
y =
T.nnet.softmax(T
.dot(h, wy) + by)
y =
tt.nnet.softmax(tt
.dot(h, wy) + by)
predict = th.function([x], y)
predict = th.function([x], y)
The function ``predict`` outputs the probability of 10 classes. You can
The function ``predict`` outputs the probability of 10 classes. You can
...
@@ -81,7 +81,7 @@ visualize it with :py:func:`theano.printing.pydotprint` as follows:
...
@@ -81,7 +81,7 @@ visualize it with :py:func:`theano.printing.pydotprint` as follows:
from theano.printing import pydotprint
from theano.printing import pydotprint
import os
import os
if not os.path.exists('examples'):
if not os.path.exists('examples'):
os.makedirs('examples')
os.makedirs('examples')
pydotprint(predict, 'examples/mlp.png')
pydotprint(predict, 'examples/mlp.png')
...
@@ -158,7 +158,7 @@ random data:
...
@@ -158,7 +158,7 @@ random data:
.. code:: python
.. code:: python
predict_profiled = th.function([x], y, profile=True)
predict_profiled = th.function([x], y, profile=True)
x_val = rng.normal(0, 1, (ninputs, nfeatures))
x_val = rng.normal(0, 1, (ninputs, nfeatures))
y_val = predict_profiled(x_val)
y_val = predict_profiled(x_val)
...
@@ -188,7 +188,7 @@ export graphs to different formats.
...
@@ -188,7 +188,7 @@ export graphs to different formats.
formatter = d3v.formatting.PyDotFormatter()
formatter = d3v.formatting.PyDotFormatter()
pydot_graph = formatter(predict_profiled)
pydot_graph = formatter(predict_profiled)
pydot_graph.write_png('examples/mlp2.png');
pydot_graph.write_png('examples/mlp2.png');
pydot_graph.write_png('examples/mlp2.pdf');
pydot_graph.write_png('examples/mlp2.pdf');
...
@@ -218,10 +218,10 @@ defines a nested graph, which will be visualized accordingly by
...
@@ -218,10 +218,10 @@ defines a nested graph, which will be visualized accordingly by
.. code:: python
.. code:: python
x, y, z =
T
.scalars('xyz')
x, y, z =
tt
.scalars('xyz')
e =
T
.nnet.sigmoid((x + y + z)**2)
e =
tt
.nnet.sigmoid((x + y + z)**2)
op = th.OpFromGraph([x, y, z], [e])
op = th.OpFromGraph([x, y, z], [e])
e2 = op(x, y, z) + op(z, y, x)
e2 = op(x, y, z) + op(z, y, x)
f = th.function([x, y, z], e2)
f = th.function([x, y, z], e2)
...
@@ -247,7 +247,7 @@ the following example.
...
@@ -247,7 +247,7 @@ the following example.
.. code:: python
.. code:: python
x, y, z =
T
.scalars('xyz')
x, y, z =
tt
.scalars('xyz')
e = x * y
e = x * y
op = th.OpFromGraph([x, y], [e])
op = th.OpFromGraph([x, y], [e])
e2 = op(x, y) + z
e2 = op(x, y) + z
...
...
doc/library/gpuarray/fft.txt
浏览文件 @
33667eb7
...
@@ -29,10 +29,10 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must
...
@@ -29,10 +29,10 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
from theano.gpuarray import fft
from theano.gpuarray import fft
x =
T
.matrix('x', dtype='float32')
x =
tt
.matrix('x', dtype='float32')
rfft = fft.curfft(x, norm='ortho')
rfft = fft.curfft(x, norm='ortho')
f_rfft = theano.function([x], rfft)
f_rfft = theano.function([x], rfft)
...
...
doc/library/printing.txt
浏览文件 @
33667eb7
...
@@ -23,8 +23,8 @@ Intermediate values in a computation cannot be printed in
...
@@ -23,8 +23,8 @@ Intermediate values in a computation cannot be printed in
the normal python way with the print statement, because Theano has no *statements*.
the normal python way with the print statement, because Theano has no *statements*.
Instead there is the :class:`Print` Op.
Instead there is the :class:`Print` Op.
>>> from theano import tensor as
T
, function, printing
>>> from theano import tensor as
tt
, function, printing
>>> x =
T
.dvector()
>>> x =
tt
.dvector()
>>> hello_world_op = printing.Print('hello world')
>>> hello_world_op = printing.Print('hello world')
>>> printed_x = hello_world_op(x)
>>> printed_x = hello_world_op(x)
>>> f = function([x], printed_x)
>>> f = function([x], printed_x)
...
@@ -34,8 +34,8 @@ hello world __str__ = [ 1. 2. 3.]
...
@@ -34,8 +34,8 @@ hello world __str__ = [ 1. 2. 3.]
If you print more than one thing in a function like `f`, they will not
If you print more than one thing in a function like `f`, they will not
necessarily be printed in the order that you think. The order might even depend
necessarily be printed in the order that you think. The order might even depend
on which graph optimizations are applied. Strictly speaking, the order of
on which graph optimizations are applied. Strictly speaking, the order of
printing is not completely defined by the interface --
printing is not completely defined by the interface --
the only hard rule is that if the input of some print output `a` is
the only hard rule is that if the input of some print output `a` is
ultimately used as an input to some other print input `b` (so that `b` depends on `a`),
ultimately used as an input to some other print input `b` (so that `b` depends on `a`),
then `a` will print before `b`.
then `a` will print before `b`.
...
@@ -51,29 +51,29 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image
...
@@ -51,29 +51,29 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image
1) The first is :func:`theano.pp`.
1) The first is :func:`theano.pp`.
>>> from theano import pp, tensor as
T
>>> from theano import pp, tensor as
tt
>>> x =
T.dscalar('x')
>>> x =
tt.dscalar('x')
>>> y = x ** 2
>>> y = x ** 2
>>> gy =
T
.grad(y, x)
>>> gy =
tt
.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization
>>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
>>> f = function([x], gy)
>>> f = function([x], gy)
>>> pp(f.maker.fgraph.outputs[0])
>>> pp(f.maker.fgraph.outputs[0])
'(TensorConstant{2.0} * x)'
'(TensorConstant{2.0} * x)'
The parameter in
T.dscalar('x') in the first line is the name of this variable
The parameter in
tt.dscalar('x') in the first line is the name of this variable
in the graph. This name is used when printing the graph to make it more readable.
in the graph. This name is used when printing the graph to make it more readable.
If no name is provided the variable x is printed as its type as returned by
If no name is provided the variable x is printed as its type as returned by
x.type(). In this example - <TensorType(float64, scalar)>.
x.type(). In this example - <TensorType(float64, scalar)>.
The name parameter can be any string. There are no naming restrictions:
The name parameter can be any string. There are no naming restrictions:
in particular, you can have many variables with the same name.
in particular, you can have many variables with the same name.
As a convention, we generally give variables a string name that is similar to the name of the variable in local scope, but
As a convention, we generally give variables a string name that is similar to the name of the variable in local scope, but
you might want to break this convention to include an object instance, or an
you might want to break this convention to include an object instance, or an
iteration number or other kinds of information in the name.
iteration number or other kinds of information in the name.
.. note::
.. note::
To make graphs legible, :func:`pp` hides some Ops that are actually in the graph. For example,
To make graphs legible, :func:`pp` hides some Ops that are actually in the graph. For example,
automatic DimShuffles are not shown.
automatic DimShuffles are not shown.
...
@@ -136,7 +136,7 @@ shown.
...
@@ -136,7 +136,7 @@ shown.
In the image, Apply nodes (the applications of ops) are shown as ellipses and variables are shown as boxes.
In the image, Apply nodes (the applications of ops) are shown as ellipses and variables are shown as boxes.
The number at the end of each label indicates graph position.
The number at the end of each label indicates graph position.
Boxes and ovals have their own set of positions, so you can have apply #1 and also a
Boxes and ovals have their own set of positions, so you can have apply #1 and also a
variable #1.
variable #1.
The numbers in the boxes (Apply nodes) are actually their position in the
The numbers in the boxes (Apply nodes) are actually their position in the
...
@@ -148,7 +148,7 @@ variables will appear as inputs. Future versions of the :func:`pydotprint`
...
@@ -148,7 +148,7 @@ variables will appear as inputs. Future versions of the :func:`pydotprint`
may distinguish these implicit inputs from explicit inputs.
may distinguish these implicit inputs from explicit inputs.
If you give updates arguments when creating your function, these are added as
If you give updates arguments when creating your function, these are added as
extra inputs and outputs to the graph.
extra inputs and outputs to the graph.
Future versions of :func:`pydotprint` may distinguish these
Future versions of :func:`pydotprint` may distinguish these
implicit inputs and outputs from explicit inputs and outputs.
implicit inputs and outputs from explicit inputs and outputs.
...
@@ -160,9 +160,9 @@ Reference
...
@@ -160,9 +160,9 @@ Reference
.. class:: Print(Op)
.. class:: Print(Op)
This identity-like Op has the side effect of printing a message followed by its inputs
This identity-like Op has the side effect of printing a message followed by its inputs
when it runs. Default behaviour is to print the __str__ representation. Optionally, one
when it runs. Default behaviour is to print the __str__ representation. Optionally, one
can pass a list of the input member functions to execute, or attributes to print.
can pass a list of the input member functions to execute, or attributes to print.
.. method:: __init__(message="", attrs=("__str__",)
.. method:: __init__(message="", attrs=("__str__",)
...
@@ -192,4 +192,3 @@ Reference
...
@@ -192,4 +192,3 @@ Reference
.. autofunction:: theano.printing.pp(*args)
.. autofunction:: theano.printing.pp(*args)
.. autofunction:: theano.printing.pydotprint
.. autofunction:: theano.printing.pydotprint
doc/library/scan.txt
浏览文件 @
33667eb7
...
@@ -38,10 +38,10 @@ The equivalent Theano code would be:
...
@@ -38,10 +38,10 @@ The equivalent Theano code would be:
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
k =
T
.iscalar("k")
k =
tt
.iscalar("k")
A =
T
.vector("A")
A =
tt
.vector("A")
# Symbolic description of the result
# Symbolic description of the result
result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
...
@@ -103,7 +103,7 @@ from a list of its coefficients:
...
@@ -103,7 +103,7 @@ from a list of its coefficients:
import numpy
import numpy
coefficients = theano.tensor.vector("coefficients")
coefficients = theano.tensor.vector("coefficients")
x =
T
.scalar("x")
x =
tt
.scalar("x")
max_coefficients_supported = 10000
max_coefficients_supported = 10000
...
@@ -164,21 +164,21 @@ downcast** of the latter.
...
@@ -164,21 +164,21 @@ downcast** of the latter.
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
up_to =
T
.iscalar("up_to")
up_to =
tt
.iscalar("up_to")
# define a named function, rather than using lambda
# define a named function, rather than using lambda
def accumulate_by_adding(arange_val, sum_to_date):
def accumulate_by_adding(arange_val, sum_to_date):
return sum_to_date + arange_val
return sum_to_date + arange_val
seq =
T
.arange(up_to)
seq =
tt
.arange(up_to)
# An unauthorized implicit downcast from the dtype of 'seq', to that of
# An unauthorized implicit downcast from the dtype of 'seq', to that of
# 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
# 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
# if this instruction were to be used instead of the next one:
# if this instruction were to be used instead of the next one:
# outputs_info =
T
.as_tensor_variable(0)
# outputs_info =
tt
.as_tensor_variable(0)
outputs_info =
T
.as_tensor_variable(np.asarray(0, seq.dtype))
outputs_info =
tt
.as_tensor_variable(np.asarray(0, seq.dtype))
scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
outputs_info=outputs_info,
outputs_info=outputs_info,
sequences=seq)
sequences=seq)
...
@@ -206,14 +206,14 @@ with all values set to zero except at the provided array indices.
...
@@ -206,14 +206,14 @@ with all values set to zero except at the provided array indices.
.. testcode::
.. testcode::
location =
T
.imatrix("location")
location =
tt
.imatrix("location")
values =
T
.vector("values")
values =
tt
.vector("values")
output_model =
T
.matrix("output_model")
output_model =
tt
.matrix("output_model")
def set_value_at_position(a_location, a_value, output_model):
def set_value_at_position(a_location, a_value, output_model):
zeros =
T
.zeros_like(output_model)
zeros =
tt
.zeros_like(output_model)
zeros_subtensor = zeros[a_location[0], a_location[1]]
zeros_subtensor = zeros[a_location[0], a_location[1]]
return
T
.set_subtensor(zeros_subtensor, a_value)
return
tt
.set_subtensor(zeros_subtensor, a_value)
result, updates = theano.scan(fn=set_value_at_position,
result, updates = theano.scan(fn=set_value_at_position,
outputs_info=None,
outputs_info=None,
...
@@ -265,7 +265,7 @@ the following:
...
@@ -265,7 +265,7 @@ the following:
.. testcode:: scan1
.. testcode:: scan1
import theano
import theano
from theano import tensor as
T
from theano import tensor as
tt
W = theano.shared(W_values) # we assume that ``W_values`` contains the
W = theano.shared(W_values) # we assume that ``W_values`` contains the
# initial values of your weight matrix
# initial values of your weight matrix
...
@@ -273,12 +273,12 @@ the following:
...
@@ -273,12 +273,12 @@ the following:
bvis = theano.shared(bvis_values)
bvis = theano.shared(bvis_values)
bhid = theano.shared(bhid_values)
bhid = theano.shared(bhid_values)
trng =
T
.shared_randomstreams.RandomStreams(1234)
trng =
tt
.shared_randomstreams.RandomStreams(1234)
def OneStep(vsample) :
def OneStep(vsample) :
hmean =
T
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hmean =
tt
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean =
T
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
vmean =
tt
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
dtype=theano.config.floatX)
...
@@ -354,13 +354,13 @@ updated:
...
@@ -354,13 +354,13 @@ updated:
bvis = theano.shared(bvis_values)
bvis = theano.shared(bvis_values)
bhid = theano.shared(bhid_values)
bhid = theano.shared(bhid_values)
trng =
T
.shared_randomstreams.RandomStreams(1234)
trng =
tt
.shared_randomstreams.RandomStreams(1234)
# OneStep, with explicit use of the shared variables (W, bvis, bhid)
# OneStep, with explicit use of the shared variables (W, bvis, bhid)
def OneStep(vsample, W, bvis, bhid):
def OneStep(vsample, W, bvis, bhid):
hmean =
T
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hmean =
tt
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean =
T
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
vmean =
tt
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
dtype=theano.config.floatX)
...
@@ -394,9 +394,9 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
...
@@ -394,9 +394,9 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
# Same OneStep as in original example.
# Same OneStep as in original example.
def OneStep(vsample) :
def OneStep(vsample) :
hmean =
T
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hmean =
tt
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean =
T
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
vmean =
tt
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
dtype=theano.config.floatX)
...
@@ -417,15 +417,15 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
...
@@ -417,15 +417,15 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
The error indicates that ``OneStep`` relies on variables that are not passed
The error indicates that ``OneStep`` relies on variables that are not passed
as arguments explicitly. Here is the correct version, with the shared
as arguments explicitly. Here is the correct version, with the shared
variables passed explicitly to ``OneStep`` and to scan:
variables passed explicitly to ``OneStep`` and to scan:
.. testcode:: scan1
.. testcode:: scan1
# OneStep, with explicit use of the shared variables (W, bvis, bhid)
# OneStep, with explicit use of the shared variables (W, bvis, bhid)
def OneStep(vsample, W, bvis, bhid) :
def OneStep(vsample, W, bvis, bhid) :
hmean =
T
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hmean =
tt
.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean =
T
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
vmean =
tt
.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
dtype=theano.config.floatX)
...
@@ -465,13 +465,13 @@ construct a function that computes one iteration step :
...
@@ -465,13 +465,13 @@ construct a function that computes one iteration step :
.. testsetup:: scan3
.. testsetup:: scan3
import theano
import theano
from theano import tensor as
T
from theano import tensor as
tt
.. testcode:: scan3
.. testcode:: scan3
def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback, W_out):
def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback, W_out):
x_t =
T
.tanh(theano.dot(x_tm1, W) + \
x_t =
tt
.tanh(theano.dot(x_tm1, W) + \
theano.dot(u_t, W_in_1) + \
theano.dot(u_t, W_in_1) + \
theano.dot(u_tm4, W_in_2) + \
theano.dot(u_tm4, W_in_2) + \
theano.dot(y_tm1, W_feedback))
theano.dot(y_tm1, W_feedback))
...
@@ -492,16 +492,16 @@ the Theano variables needed we construct our RNN as follows :
...
@@ -492,16 +492,16 @@ the Theano variables needed we construct our RNN as follows :
.. testcode:: scan3
.. testcode:: scan3
W =
T
.matrix()
W =
tt
.matrix()
W_in_1 =
T
.matrix()
W_in_1 =
tt
.matrix()
W_in_2 =
T
.matrix()
W_in_2 =
tt
.matrix()
W_feedback =
T
.matrix()
W_feedback =
tt
.matrix()
W_out =
T
.matrix()
W_out =
tt
.matrix()
u =
T
.matrix() # it is a sequence of vectors
u =
tt
.matrix() # it is a sequence of vectors
x0 =
T
.matrix() # initial state of x has to be a matrix, since
x0 =
tt
.matrix() # initial state of x has to be a matrix, since
# it has to cover x[-3]
# it has to cover x[-3]
y0 =
T
.vector() # y0 is just a vector since scan has only to provide
y0 =
tt
.vector() # y0 is just a vector since scan has only to provide
# y[-1]
# y[-1]
...
@@ -541,9 +541,9 @@ value ``max_value``.
...
@@ -541,9 +541,9 @@ value ``max_value``.
def power_of_2(previous_power, max_value):
def power_of_2(previous_power, max_value):
return previous_power*2, theano.scan_module.until(previous_power*2 > max_value)
return previous_power*2, theano.scan_module.until(previous_power*2 > max_value)
max_value =
T
.scalar()
max_value =
tt
.scalar()
values, _ = theano.scan(power_of_2,
values, _ = theano.scan(power_of_2,
outputs_info =
T
.constant(1.),
outputs_info =
tt
.constant(1.),
non_sequences = max_value,
non_sequences = max_value,
n_steps = 1024)
n_steps = 1024)
...
...
doc/library/tensor/basic.txt
浏览文件 @
33667eb7
...
@@ -9,7 +9,7 @@ Basic Tensor Functionality
...
@@ -9,7 +9,7 @@ Basic Tensor Functionality
.. testsetup::
.. testsetup::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
from theano.tensor import scalar, iscalar, TensorType, dmatrix, ivector
from theano.tensor import scalar, iscalar, TensorType, dmatrix, ivector
from theano.tensor import set_subtensor, inc_subtensor, batched_dot
from theano.tensor import set_subtensor, inc_subtensor, batched_dot
from theano import shared
from theano import shared
...
@@ -19,12 +19,12 @@ Basic Tensor Functionality
...
@@ -19,12 +19,12 @@ Basic Tensor Functionality
Theano supports any kind of Python object, but its focus is support for
Theano supports any kind of Python object, but its focus is support for
symbolic matrix expressions. When you type,
symbolic matrix expressions. When you type,
>>> x =
T
.fmatrix()
>>> x =
tt
.fmatrix()
the ``x`` is a :class:`TensorVariable` instance.
the ``x`` is a :class:`TensorVariable` instance.
The ``
T
.fmatrix`` object itself is an instance of :class:`TensorType`.
The ``
tt
.fmatrix`` object itself is an instance of :class:`TensorType`.
Theano knows what type of variable ``x`` is because ``x.type``
Theano knows what type of variable ``x`` is because ``x.type``
points back to ``
T
.fmatrix``.
points back to ``
tt
.fmatrix``.
This chapter explains the various ways of creating tensor variables,
This chapter explains the various ways of creating tensor variables,
the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
...
@@ -531,7 +531,7 @@ TensorVariable
...
@@ -531,7 +531,7 @@ TensorVariable
Transpose of this tensor.
Transpose of this tensor.
>>> x =
T
.zmatrix()
>>> x =
tt
.zmatrix()
>>> y = 3+.2j * x.T
>>> y = 3+.2j * x.T
.. note::
.. note::
...
@@ -707,17 +707,17 @@ Creating Tensor
...
@@ -707,17 +707,17 @@ Creating Tensor
By default, it will be x.dtype.
By default, it will be x.dtype.
Returns a tensor the shape of x filled with zeros of the type of dtype.
Returns a tensor the shape of x filled with zeros of the type of dtype.
.. function:: ones_like(x)
.. function:: ones_like(x)
:param x: tensor that has the same shape as output
:param x: tensor that has the same shape as output
:param dtype: data-type, optional
:param dtype: data-type, optional
By default, it will be x.dtype.
By default, it will be x.dtype.
Returns a tensor the shape of x filled with ones of the type of dtype.
Returns a tensor the shape of x filled with ones of the type of dtype.
.. function:: zeros(shape, dtype=None)
.. function:: zeros(shape, dtype=None)
...
@@ -766,7 +766,7 @@ Creating Tensor
...
@@ -766,7 +766,7 @@ Creating Tensor
.. function:: stack(tensors, axis=0)
.. function:: stack(tensors, axis=0)
Stack tensors in sequence on given axis (default is 0).
Stack tensors in sequence on given axis (default is 0).
Take a sequence of tensors and stack them on given axis to make a single
Take a sequence of tensors and stack them on given axis to make a single
tensor. The size in dimension `axis` of the result will be equal to the number
tensor. The size in dimension `axis` of the result will be equal to the number
...
@@ -824,10 +824,10 @@ Creating Tensor
...
@@ -824,10 +824,10 @@ Creating Tensor
:param tensors: one or more tensors of the same rank
:param tensors: one or more tensors of the same rank
:returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
:returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
>>> x0 =
T
.scalar()
>>> x0 =
tt
.scalar()
>>> x1 =
T
.scalar()
>>> x1 =
tt
.scalar()
>>> x2 =
T
.scalar()
>>> x2 =
tt
.scalar()
>>> x =
T
.stack(x0, x1, x2)
>>> x =
tt
.stack(x0, x1, x2)
>>> x.ndim # x is a vector of length 3.
>>> x.ndim # x is a vector of length 3.
1
1
...
@@ -840,10 +840,10 @@ Creating Tensor
...
@@ -840,10 +840,10 @@ Creating Tensor
:param axis: Tensors will be joined along this axis, so they may have different
:param axis: Tensors will be joined along this axis, so they may have different
``shape[axis]``
``shape[axis]``
>>> x0 =
T
.fmatrix()
>>> x0 =
tt
.fmatrix()
>>> x1 =
T
.ftensor3()
>>> x1 =
tt
.ftensor3()
>>> x2 =
T
.fvector()
>>> x2 =
tt
.fvector()
>>> x =
T.concatenate([x0, x1[0], T
.shape_padright(x2)], axis=1)
>>> x =
tt.concatenate([x0, x1[0], tt
.shape_padright(x2)], axis=1)
>>> x.ndim
>>> x.ndim
2
2
...
@@ -1131,7 +1131,7 @@ Indexing
...
@@ -1131,7 +1131,7 @@ Indexing
Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
Theano fully supports basic indexing
Theano fully supports basic indexing
(see `NumPy's indexing <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_)
(see `NumPy's indexing <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_)
and `integer advanced indexing
and `integer advanced indexing
<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_.
<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_.
Since version 0.10.0 Theano also supports boolean indexing with boolean
Since version 0.10.0 Theano also supports boolean indexing with boolean
...
@@ -1151,7 +1151,7 @@ Operator Support
...
@@ -1151,7 +1151,7 @@ Operator Support
Many Python operators are supported.
Many Python operators are supported.
>>> a, b =
T.itensor3(), T
.itensor3() # example inputs
>>> a, b =
tt.itensor3(), tt
.itensor3() # example inputs
Arithmetic
Arithmetic
--------------
--------------
...
@@ -1159,13 +1159,13 @@ Arithmetic
...
@@ -1159,13 +1159,13 @@ Arithmetic
.. doctest::
.. doctest::
:options: +SKIP
:options: +SKIP
>>> a + 3 #
T
.add(a, 3) -> itensor3
>>> a + 3 #
tt
.add(a, 3) -> itensor3
>>> 3 - a #
T
.sub(3, a)
>>> 3 - a #
tt
.sub(3, a)
>>> a * 3.5 #
T
.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
>>> a * 3.5 #
tt
.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
>>> 2.2 / a #
T
.truediv(2.2, a)
>>> 2.2 / a #
tt
.truediv(2.2, a)
>>> 2.2 // a #
T
.intdiv(2.2, a)
>>> 2.2 // a #
tt
.intdiv(2.2, a)
>>> 2.2**a #
T
.pow(2.2, a)
>>> 2.2**a #
tt
.pow(2.2, a)
>>> b % a #
T
.mod(b, a)
>>> b % a #
tt
.mod(b, a)
Bitwise
Bitwise
-------------
-------------
...
@@ -1173,10 +1173,10 @@ Bitwise
...
@@ -1173,10 +1173,10 @@ Bitwise
.. doctest::
.. doctest::
:options: +SKIP
:options: +SKIP
>>> a & b #
T.and_(a,b) bitwise and (alias T
.bitwise_and)
>>> a & b #
tt.and_(a,b) bitwise and (alias tt
.bitwise_and)
>>> a ^ 1 #
T.xor(a,1) bitwise xor (alias T
.bitwise_xor)
>>> a ^ 1 #
tt.xor(a,1) bitwise xor (alias tt
.bitwise_xor)
>>> a | b #
T.or_(a,b) bitwise or (alias T
.bitwise_or)
>>> a | b #
tt.or_(a,b) bitwise or (alias tt
.bitwise_or)
>>> ~a #
T.invert(a) bitwise invert (alias T
.bitwise_not)
>>> ~a #
tt.invert(a) bitwise invert (alias tt
.bitwise_not)
Inplace
Inplace
-------------
-------------
...
@@ -1205,9 +1205,9 @@ Casting
...
@@ -1205,9 +1205,9 @@ Casting
.. testcode:: cast
.. testcode:: cast
import theano.tensor as
T
import theano.tensor as
tt
x =
T
.matrix()
x =
tt
.matrix()
x_as_int =
T
.cast(x, 'int32')
x_as_int =
tt
.cast(x, 'int32')
Attempting to casting a complex value to a real value is ambiguous and
Attempting to casting a complex value to a real value is ambiguous and
will raise an exception. Use `real()`, `imag()`, `abs()`, or `angle()`.
will raise an exception. Use `real()`, `imag()`, `abs()`, or `angle()`.
...
@@ -1241,9 +1241,9 @@ The six usual equality and inequality operators share the same interface.
...
@@ -1241,9 +1241,9 @@ The six usual equality and inequality operators share the same interface.
.. testcode:: oper
.. testcode:: oper
import theano.tensor as
T
import theano.tensor as
tt
x,y =
T
.dmatrices('x','y')
x,y =
tt
.dmatrices('x','y')
z =
T
.le(x,y)
z =
tt
.le(x,y)
.. function:: lt(a, b)
.. function:: lt(a, b)
...
@@ -1334,10 +1334,10 @@ Condition
...
@@ -1334,10 +1334,10 @@ Condition
.. testcode:: switch
.. testcode:: switch
import theano.tensor as
T
import theano.tensor as
tt
a,b =
T
.dmatrices('a','b')
a,b =
tt
.dmatrices('a','b')
x,y =
T
.dmatrices('x','y')
x,y =
tt
.dmatrices('x','y')
z =
T.switch(T
.lt(a,b), x, y)
z =
tt.switch(tt
.lt(a,b), x, y)
.. function:: where(cond, ift, iff)
.. function:: where(cond, ift, iff)
...
@@ -1405,8 +1405,8 @@ Here is an example using the bit-wise ``and_`` via the ``&`` operator:
...
@@ -1405,8 +1405,8 @@ Here is an example using the bit-wise ``and_`` via the ``&`` operator:
.. testcode:: bitwise
.. testcode:: bitwise
import theano.tensor as
T
import theano.tensor as
tt
x,y =
T
.imatrices('x','y')
x,y =
tt
.imatrices('x','y')
z = x & y
z = x & y
...
@@ -1655,8 +1655,8 @@ Linear Algebra
...
@@ -1655,8 +1655,8 @@ Linear Algebra
Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
Example:
Example:
>>> first =
T
.tensor3('first')
>>> first =
tt
.tensor3('first')
>>> second =
T
.tensor3('second')
>>> second =
tt
.tensor3('second')
>>> result = batched_dot(first, second)
>>> result = batched_dot(first, second)
:note: This is a subset of numpy.einsum, but we do not provide it for now.
:note: This is a subset of numpy.einsum, but we do not provide it for now.
...
@@ -1707,15 +1707,15 @@ Linear Algebra
...
@@ -1707,15 +1707,15 @@ Linear Algebra
.. function:: mgrid
.. function:: mgrid
:returns: an instance which returns a dense (or fleshed out) mesh-grid
:returns: an instance which returns a dense (or fleshed out) mesh-grid
when indexed, so that each returned argument has the same shape.
when indexed, so that each returned argument has the same shape.
The dimensions and number of the output arrays are equal to the
The dimensions and number of the output arrays are equal to the
number of indexing dimensions. If the step length is not a complex
number of indexing dimensions. If the step length is not a complex
number, then the stop is not inclusive.
number, then the stop is not inclusive.
Example:
Example:
>>> a =
T
.mgrid[0:5, 0:3]
>>> a =
tt
.mgrid[0:5, 0:3]
>>> a[0].eval()
>>> a[0].eval()
array([[0, 0, 0],
array([[0, 0, 0],
[1, 1, 1],
[1, 1, 1],
...
@@ -1731,15 +1731,15 @@ Linear Algebra
...
@@ -1731,15 +1731,15 @@ Linear Algebra
.. function:: ogrid
.. function:: ogrid
:returns: an instance which returns an open (i.e. not fleshed out) mesh-grid
:returns: an instance which returns an open (i.e. not fleshed out) mesh-grid
when indexed, so that only one dimension of each returned array is
when indexed, so that only one dimension of each returned array is
greater than 1. The dimension and number of the output arrays are
greater than 1. The dimension and number of the output arrays are
equal to the number of indexing dimensions. If the step length is
equal to the number of indexing dimensions. If the step length is
not a complex number, then the stop is not inclusive.
not a complex number, then the stop is not inclusive.
Example:
Example:
>>> b =
T
.ogrid[0:5, 0:3]
>>> b =
tt
.ogrid[0:5, 0:3]
>>> b[0].eval()
>>> b[0].eval()
array([[0],
array([[0],
[1],
[1],
...
@@ -1749,7 +1749,7 @@ Linear Algebra
...
@@ -1749,7 +1749,7 @@ Linear Algebra
>>> b[1].eval()
>>> b[1].eval()
array([[0, 1, 2]])
array([[0, 1, 2]])
Gradient / Differentiation
Gradient / Differentiation
==========================
==========================
...
...
doc/library/tensor/fft.txt
浏览文件 @
33667eb7
...
@@ -24,10 +24,10 @@ oscillates due to the box function being shifted to the middle of the array.
...
@@ -24,10 +24,10 @@ oscillates due to the box function being shifted to the middle of the array.
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
from theano.tensor import fft
from theano.tensor import fft
x =
T
.matrix('x', dtype='float64')
x =
tt
.matrix('x', dtype='float64')
rfft = fft.rfft(x, norm='ortho')
rfft = fft.rfft(x, norm='ortho')
f_rfft = theano.function([x], rfft)
f_rfft = theano.function([x], rfft)
...
...
doc/library/tensor/nnet/nnet.txt
浏览文件 @
33667eb7
...
@@ -50,11 +50,11 @@
...
@@ -50,11 +50,11 @@
.. testcode::
.. testcode::
import theano.tensor as
T
import theano.tensor as
tt
x, y, b =
T
.dvectors('x', 'y', 'b')
x, y, b =
tt
.dvectors('x', 'y', 'b')
W =
T
.dmatrix('W')
W =
tt
.dmatrix('W')
y =
T.nnet.sigmoid(T
.dot(W, x) + b)
y =
tt.nnet.sigmoid(tt
.dot(W, x) + b)
.. note:: The underlying code will return an exact 0 or 1 if an
.. note:: The underlying code will return an exact 0 or 1 if an
element of x is too small or too big.
element of x is too small or too big.
...
@@ -112,9 +112,9 @@
...
@@ -112,9 +112,9 @@
.. testcode::
.. testcode::
x,y,b =
T
.dvectors('x','y','b')
x,y,b =
tt
.dvectors('x','y','b')
W =
T
.dmatrix('W')
W =
tt
.dmatrix('W')
y =
T.nnet.softplus(T
.dot(W,x) + b)
y =
tt.nnet.softplus(tt
.dot(W,x) + b)
.. function:: softsign(x)
.. function:: softsign(x)
...
@@ -143,9 +143,9 @@
...
@@ -143,9 +143,9 @@
.. testcode::
.. testcode::
x,y,b =
T
.dvectors('x','y','b')
x,y,b =
tt
.dvectors('x','y','b')
W =
T
.dmatrix('W')
W =
tt
.dmatrix('W')
y =
T.nnet.softmax(T
.dot(W,x) + b)
y =
tt.nnet.softmax(tt
.dot(W,x) + b)
.. autofunction:: theano.tensor.nnet.relu
.. autofunction:: theano.tensor.nnet.relu
...
@@ -171,12 +171,12 @@
...
@@ -171,12 +171,12 @@
.. testcode::
.. testcode::
x, y, b, c =
T
.dvectors('x', 'y', 'b', 'c')
x, y, b, c =
tt
.dvectors('x', 'y', 'b', 'c')
W =
T
.dmatrix('W')
W =
tt
.dmatrix('W')
V =
T
.dmatrix('V')
V =
tt
.dmatrix('V')
h =
T.nnet.sigmoid(T
.dot(W, x) + b)
h =
tt.nnet.sigmoid(tt
.dot(W, x) + b)
x_recons =
T.nnet.sigmoid(T
.dot(V, h) + c)
x_recons =
tt.nnet.sigmoid(tt
.dot(V, h) + c)
recon_cost =
T
.nnet.binary_crossentropy(x_recons, x).mean()
recon_cost =
tt
.nnet.binary_crossentropy(x_recons, x).mean()
.. function:: sigmoid_binary_crossentropy(output,target)
.. function:: sigmoid_binary_crossentropy(output,target)
...
@@ -200,14 +200,14 @@
...
@@ -200,14 +200,14 @@
.. testcode::
.. testcode::
x, y, b, c =
T
.dvectors('x', 'y', 'b', 'c')
x, y, b, c =
tt
.dvectors('x', 'y', 'b', 'c')
W =
T
.dmatrix('W')
W =
tt
.dmatrix('W')
V =
T
.dmatrix('V')
V =
tt
.dmatrix('V')
h =
T.nnet.sigmoid(T
.dot(W, x) + b)
h =
tt.nnet.sigmoid(tt
.dot(W, x) + b)
x_precons =
T
.dot(V, h) + c
x_precons =
tt
.dot(V, h) + c
# final reconstructions are given by sigmoid(x_precons), but we leave
# final reconstructions are given by sigmoid(x_precons), but we leave
# them unnormalized as sigmoid_binary_crossentropy applies sigmoid
# them unnormalized as sigmoid_binary_crossentropy applies sigmoid
recon_cost =
T
.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
recon_cost =
tt
.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
.. function:: categorical_crossentropy(coding_dist,true_dist)
.. function:: categorical_crossentropy(coding_dist,true_dist)
...
@@ -244,8 +244,8 @@
...
@@ -244,8 +244,8 @@
.. testcode::
.. testcode::
y =
T.nnet.softmax(T
.dot(W, x) + b)
y =
tt.nnet.softmax(tt
.dot(W, x) + b)
cost =
T
.nnet.categorical_crossentropy(y, o)
cost =
tt
.nnet.categorical_crossentropy(y, o)
# o is either the above-mentioned 1-of-N vector or 2D tensor
# o is either the above-mentioned 1-of-N vector or 2D tensor
...
...
doc/nextml2015/presentation.tex
浏览文件 @
33667eb7
...
@@ -271,15 +271,15 @@ Some example of scalar operations:
...
@@ -271,15 +271,15 @@ Some example of scalar operations:
}
}
\begin{lstlisting}
\begin{lstlisting}
import theano
import theano
from theano import tensor as
T
from theano import tensor as
tt
x =
T
.scalar()
x =
tt
.scalar()
y =
T
.scalar()
y =
tt
.scalar()
z = x+y
z = x+y
w = z*x
w = z*x
a =
T
.sqrt(w)
a =
tt
.sqrt(w)
b =
T
.exp(a)
b =
tt
.exp(a)
c = a ** b
c = a ** b
d =
T
.log(c)
d =
tt
.log(c)
\end{lstlisting}
\end{lstlisting}
\end{frame}
\end{frame}
...
@@ -291,13 +291,13 @@ d = T.log(c)
...
@@ -291,13 +291,13 @@ d = T.log(c)
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
from theano import tensor as
T
from theano import tensor as
tt
x =
T
.vector()
x =
tt
.vector()
y =
T
.vector()
y =
tt
.vector()
# Scalar math applied elementwise
# Scalar math applied elementwise
a = x * y
a = x * y
# Vector dot product
# Vector dot product
b =
T
.dot(x, y)
b =
tt
.dot(x, y)
# Broadcasting (as NumPy, very powerful)
# Broadcasting (as NumPy, very powerful)
c = a + b
c = a + b
\end{lstlisting}
\end{lstlisting}
...
@@ -311,14 +311,14 @@ c = a + b
...
@@ -311,14 +311,14 @@ c = a + b
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
from theano import tensor as
T
from theano import tensor as
tt
x =
T
.matrix()
x =
tt
.matrix()
y =
T
.matrix()
y =
tt
.matrix()
a =
T
.vector()
a =
tt
.vector()
# Matrix-matrix product
# Matrix-matrix product
b =
T
.dot(x, y)
b =
tt
.dot(x, y)
# Matrix-vector product
# Matrix-vector product
c =
T
.dot(x, a)
c =
tt
.dot(x, a)
\end{lstlisting}
\end{lstlisting}
\end{frame}
\end{frame}
...
@@ -336,11 +336,11 @@ c = T.dot(x, a)
...
@@ -336,11 +336,11 @@ c = T.dot(x, a)
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
from theano import tensor as
T
from theano import tensor as
tt
tensor3 =
T
.TensorType(
tensor3 =
tt
.TensorType(
broadcastable=(False, False, False),
broadcastable=(False, False, False),
dtype='float32')
dtype='float32')
x =
T
.tensor3()
x =
tt
.tensor3()
\end{lstlisting}
\end{lstlisting}
\end{frame}
\end{frame}
...
@@ -351,8 +351,8 @@ x = T.tensor3()
...
@@ -351,8 +351,8 @@ x = T.tensor3()
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
from theano import tensor as
T
from theano import tensor as
tt
tensor3 =
T
.TensorType(
tensor3 =
tt
.TensorType(
broadcastable=(False, False, False),
broadcastable=(False, False, False),
dtype='float32')
dtype='float32')
x = tensor3()
x = tensor3()
...
@@ -370,13 +370,13 @@ mx = x.max(axis=1)
...
@@ -370,13 +370,13 @@ mx = x.max(axis=1)
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
from theano import tensor as
T
from theano import tensor as
tt
tensor3 =
T
.TensorType(
tensor3 =
tt
.TensorType(
broadcastable=(False, False, False))
broadcastable=(False, False, False))
x = tensor3()
x = tensor3()
y = x.dimshuffle((2, 1, 0))
y = x.dimshuffle((2, 1, 0))
a =
T
.matrix()
a =
tt
.matrix()
b = a.
T
b = a.
tt
# Same as b
# Same as b
c = a.dimshuffle((0, 1))
c = a.dimshuffle((0, 1))
# Adding to larger tensor
# Adding to larger tensor
...
@@ -427,9 +427,9 @@ a_tensor[an_index_tensor, ...]
...
@@ -427,9 +427,9 @@ a_tensor[an_index_tensor, ...]
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
>>> from theano import tensor as
T
>>> from theano import tensor as
tt
>>> x =
T
.scalar()
>>> x =
tt
.scalar()
>>> y =
T
.scalar()
>>> y =
tt
.scalar()
>>> from theano import function
>>> from theano import function
>>> # first arg is list of SYMBOLIC inputs
>>> # first arg is list of SYMBOLIC inputs
>>> # second arg is SYMBOLIC output
>>> # second arg is SYMBOLIC output
...
@@ -518,8 +518,8 @@ modes regard as fine.
...
@@ -518,8 +518,8 @@ modes regard as fine.
\item
Theano current back-end only supports 32 bit on GPU
\item
Theano current back-end only supports 32 bit on GPU
\item
libgpuarray (new-backend) support all dtype
\item
libgpuarray (new-backend) support all dtype
\item
CUDA supports 64 bit, but is slow on gamer GPUs
\item
CUDA supports 64 bit, but is slow on gamer GPUs
\item
T.fscalar, T.fvector, T
.fmatrix are all 32 bit
\item
tt.fscalar, tt.fvector, tt
.fmatrix are all 32 bit
\item
T.scalar, T.vector, T
.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag
\item
tt.scalar, tt.vector, tt
.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag
\item
floatX is float64 by default, set it to float32
\item
floatX is float64 by default, set it to float32
\item
Set device flag to gpu (or a specific gpu, like gpu0)
\item
Set device flag to gpu (or a specific gpu, like gpu0)
\item
Flag: warn
\_
float64=
{
'ignore', 'warn', 'raise', 'pdb'
}
\item
Flag: warn
\_
float64=
{
'ignore', 'warn', 'raise', 'pdb'
}
...
@@ -547,9 +547,9 @@ modes regard as fine.
...
@@ -547,9 +547,9 @@ modes regard as fine.
stringstyle=
\color
{
violet
}
,
stringstyle=
\color
{
violet
}
,
}
}
\begin{lstlisting}
\begin{lstlisting}
>>> x =
T
.scalar('x')
>>> x =
tt
.scalar('x')
>>> y = 2. * x
>>> y = 2. * x
>>> g =
T
.grad(y, x)
>>> g =
tt
.grad(y, x)
# Print the not optimized graph
# Print the not optimized graph
>>> theano.printing.pydotprint(g)
>>> theano.printing.pydotprint(g)
\end{lstlisting}
\end{lstlisting}
...
@@ -559,7 +559,7 @@ modes regard as fine.
...
@@ -559,7 +559,7 @@ modes regard as fine.
%% \begin{frame}{Theano Variables}
%% \begin{frame}{Theano Variables}
%% \begin{itemize}
%% \begin{itemize}
%% \item A Variable is a theano expression
%% \item A Variable is a theano expression
%% \item Can come from
T.scalar, T
.matrix, etc.
%% \item Can come from
tt.scalar, tt
.matrix, etc.
%% \item Can come from doing operations on other Variables
%% \item Can come from doing operations on other Variables
%% \item Every Variable has a type field, identifying its Type \newline
%% \item Every Variable has a type field, identifying its Type \newline
%% e.g. TensorType((True, False), ‘float32’)
%% e.g. TensorType((True, False), ‘float32’)
...
@@ -623,9 +623,9 @@ modes regard as fine.
...
@@ -623,9 +623,9 @@ modes regard as fine.
\begin{lstlisting}
\begin{lstlisting}
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
x =
T
.vector()
x =
tt
.vector()
y =
T
.vector()
y =
tt
.vector()
z = x + x
z = x + x
z = z + y
z = z + y
f = theano.function([x, y], z)
f = theano.function([x, y], z)
...
@@ -857,16 +857,16 @@ Elemwise{mul,no_inplace} [@A] ''
...
@@ -857,16 +857,16 @@ Elemwise{mul,no_inplace} [@A] ''
}
}
\begin{lstlisting}
\begin{lstlisting}
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variables
# define tensor variables
W =
T
.matrix("W")
W =
tt
.matrix("W")
X =
T
.matrix("X")
X =
tt
.matrix("X")
b
_
sym =
T
.vector("b
_
sym")
b
_
sym =
tt
.vector("b
_
sym")
# define shared random stream
# define shared random stream
trng =
T
.shared
_
randomstreams.RandomStreams(1234)
trng =
tt
.shared
_
randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
d=trng.binomial(size=W[1].shape)
\end{lstlisting}
\end{lstlisting}
\end{frame}
\end{frame}
...
@@ -881,7 +881,7 @@ d=trng.binomial(size=W[1].shape)
...
@@ -881,7 +881,7 @@ d=trng.binomial(size=W[1].shape)
}
}
\begin{lstlisting}
\begin{lstlisting}
results, updates = theano.scan(
results, updates = theano.scan(
lambda v:
T.tanh(T
.dot(v, W) + b
_
sym) * d,
lambda v:
tt.tanh(tt
.dot(v, W) + b
_
sym) * d,
sequences=X)
sequences=X)
f = theano.function(inputs=[X, W, b
_
sym],
f = theano.function(inputs=[X, W, b
_
sym],
outputs=[results],
outputs=[results],
...
@@ -903,11 +903,11 @@ print f(x, w, b)
...
@@ -903,11 +903,11 @@ print f(x, w, b)
}
}
\begin{lstlisting}
\begin{lstlisting}
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
theano.config.warn.subtensor
_
merge
_
bug = False
theano.config.warn.subtensor
_
merge
_
bug = False
k =
T
.iscalar("k")
k =
tt
.iscalar("k")
A =
T
.vector("A")
A =
tt
.vector("A")
def inner
_
fct(prior
_
result, B):
def inner
_
fct(prior
_
result, B):
return prior
_
result * B
return prior
_
result * B
...
@@ -949,7 +949,7 @@ print power(range(10), 2)
...
@@ -949,7 +949,7 @@ print power(range(10), 2)
result, updates = theano.scan(
result, updates = theano.scan(
fn=inner
_
fct,
fn=inner
_
fct,
sequences=[]
sequences=[]
outputs
_
info=[
T
.ones
_
like(A)],
outputs
_
info=[
tt
.ones
_
like(A)],
non
_
sequences=A,
non
_
sequences=A,
n
_
steps=k)
n
_
steps=k)
\end{lstlisting}
\end{lstlisting}
...
...
doc/sandbox/logistic_regression_example.txt
浏览文件 @
33667eb7
...
@@ -13,25 +13,25 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
...
@@ -13,25 +13,25 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
def build_logistic_regression_model(n_in, n_out, l2_coef=30.0)
def build_logistic_regression_model(n_in, n_out, l2_coef=30.0)
# DECLARE SOME VARIABLES
# DECLARE SOME VARIABLES
import tensor as
T
import tensor as
tt
x =
T
.matrix() #our points, one point per row
x =
tt
.matrix() #our points, one point per row
y =
T
.matrix() #store our labels as place codes (label 3 of 5 is vector [00100])
y =
tt
.matrix() #store our labels as place codes (label 3 of 5 is vector [00100])
w =
T
.matrix() #the linear transform to apply to our input points
w =
tt
.matrix() #the linear transform to apply to our input points
b =
T
.vector() #a vector of biases, which make our transform affine instead of linear
b =
tt
.vector() #a vector of biases, which make our transform affine instead of linear
stepsize =
T
.scalar('stepsize') # a stepsize for gradient descent
stepsize =
tt
.scalar('stepsize') # a stepsize for gradient descent
# REGRESSION MODEL AND COSTS TO MINIMIZE
# REGRESSION MODEL AND COSTS TO MINIMIZE
prediction =
T.softmax(T
.dot(x, w) + b)
prediction =
tt.softmax(tt
.dot(x, w) + b)
cross_entropy =
T.sum(y * T
.log(prediction), axis=1)
cross_entropy =
tt.sum(y * tt
.log(prediction), axis=1)
cost =
T.sum(cross_entropy) + l2_coef * T.sum(T
.sum(w*w))
cost =
tt.sum(cross_entropy) + l2_coef * tt.sum(tt
.sum(w*w))
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
grad_w, grad_b =
T
.grad(cost, [w, b])
grad_w, grad_b =
tt
.grad(cost, [w, b])
#
#
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
...
...
doc/tutorial/adding.txt
浏览文件 @
33667eb7
...
@@ -12,10 +12,10 @@ let's make a simple function: add two numbers together. Here is how you do
...
@@ -12,10 +12,10 @@ let's make a simple function: add two numbers together. Here is how you do
it:
it:
>>> import numpy
>>> import numpy
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> from theano import function
>>> from theano import function
>>> x =
T
.dscalar('x')
>>> x =
tt
.dscalar('x')
>>> y =
T
.dscalar('y')
>>> y =
tt
.dscalar('y')
>>> z = x + y
>>> z = x + y
>>> f = function([x, y], z)
>>> f = function([x, y], z)
...
@@ -55,10 +55,10 @@ instruction. Behind the scene, *f* was being compiled into C code.
...
@@ -55,10 +55,10 @@ instruction. Behind the scene, *f* was being compiled into C code.
**Step 1**
**Step 1**
>>> x =
T
.dscalar('x')
>>> x =
tt
.dscalar('x')
>>> y =
T
.dscalar('y')
>>> y =
tt
.dscalar('y')
In Theano, all symbols must be typed. In particular, ``
T
.dscalar``
In Theano, all symbols must be typed. In particular, ``
tt
.dscalar``
is the type we assign to "0-dimensional arrays (`scalar`) of doubles
is the type we assign to "0-dimensional arrays (`scalar`) of doubles
(`d`)". It is a Theano :ref:`type`.
(`d`)". It is a Theano :ref:`type`.
...
@@ -72,12 +72,12 @@ field, as you can see here:
...
@@ -72,12 +72,12 @@ field, as you can see here:
<class 'theano.tensor.var.TensorVariable'>
<class 'theano.tensor.var.TensorVariable'>
>>> x.type
>>> x.type
TensorType(float64, scalar)
TensorType(float64, scalar)
>>>
T
.dscalar
>>>
tt
.dscalar
TensorType(float64, scalar)
TensorType(float64, scalar)
>>> x.type is
T
.dscalar
>>> x.type is
tt
.dscalar
True
True
By calling ``
T
.dscalar`` with a string argument, you create a
By calling ``
tt
.dscalar`` with a string argument, you create a
*Variable* representing a floating-point scalar quantity with the
*Variable* representing a floating-point scalar quantity with the
given name. If you provide no argument, the symbol will be unnamed. Names
given name. If you provide no argument, the symbol will be unnamed. Names
are not required, but they can help debugging.
are not required, but they can help debugging.
...
@@ -124,9 +124,9 @@ then be used like a normal Python function.
...
@@ -124,9 +124,9 @@ then be used like a normal Python function.
you to import :func:`function` . Here is how :func:`eval` works:
you to import :func:`function` . Here is how :func:`eval` works:
>>> import numpy
>>> import numpy
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> x =
T
.dscalar('x')
>>> x =
tt
.dscalar('x')
>>> y =
T
.dscalar('y')
>>> y =
tt
.dscalar('y')
>>> z = x + y
>>> z = x + y
>>> numpy.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
>>> numpy.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
True
True
...
@@ -149,8 +149,8 @@ You might already have guessed how to do this. Indeed, the only change
...
@@ -149,8 +149,8 @@ You might already have guessed how to do this. Indeed, the only change
from the previous example is that you need to instantiate *x* and
from the previous example is that you need to instantiate *x* and
*y* using the matrix Types:
*y* using the matrix Types:
>>> x =
T
.dmatrix('x')
>>> x =
tt
.dmatrix('x')
>>> y =
T
.dmatrix('y')
>>> y =
tt
.dmatrix('y')
>>> z = x + y
>>> z = x + y
>>> f = function([x, y], z)
>>> f = function([x, y], z)
...
...
doc/tutorial/broadcasting.txt
浏览文件 @
33667eb7
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
.. _tutbroadcasting:
.. _tutbroadcasting:
...
@@ -39,10 +39,10 @@ information is given in the :ref:`type` of a *Variable*.
...
@@ -39,10 +39,10 @@ information is given in the :ref:`type` of a *Variable*.
The following code illustrates how rows and columns are broadcasted in order to perform an addition operation with a matrix:
The following code illustrates how rows and columns are broadcasted in order to perform an addition operation with a matrix:
>>> r =
T
.row()
>>> r =
tt
.row()
>>> r.broadcastable
>>> r.broadcastable
(True, False)
(True, False)
>>> mtr =
T
.matrix()
>>> mtr =
tt
.matrix()
>>> mtr.broadcastable
>>> mtr.broadcastable
(False, False)
(False, False)
>>> f_row = theano.function([r, mtr], [r + mtr])
>>> f_row = theano.function([r, mtr], [r + mtr])
...
@@ -58,7 +58,7 @@ array([[0, 1, 2],
...
@@ -58,7 +58,7 @@ array([[0, 1, 2],
[array([[ 0., 2., 4.],
[array([[ 0., 2., 4.],
[ 3., 5., 7.],
[ 3., 5., 7.],
[ 6., 8., 10.]])]
[ 6., 8., 10.]])]
>>> c =
T
.col()
>>> c =
tt
.col()
>>> c.broadcastable
>>> c.broadcastable
(False, True)
(False, True)
>>> f_col = theano.function([c, mtr], [c + mtr])
>>> f_col = theano.function([c, mtr], [c + mtr])
...
@@ -80,4 +80,3 @@ See also:
...
@@ -80,4 +80,3 @@ See also:
* `SciPy documentation about numpy's broadcasting <http://www.scipy.org/EricsBroadcastingDoc>`_
* `SciPy documentation about numpy's broadcasting <http://www.scipy.org/EricsBroadcastingDoc>`_
* `OnLamp article about numpy's broadcasting <http://www.onlamp.com/pub/a/python/2000/09/27/numerically.html>`_
* `OnLamp article about numpy's broadcasting <http://www.onlamp.com/pub/a/python/2000/09/27/numerically.html>`_
doc/tutorial/conditions.txt
浏览文件 @
33667eb7
...
@@ -20,15 +20,15 @@ IfElse vs Switch
...
@@ -20,15 +20,15 @@ IfElse vs Switch
.. testcode::
.. testcode::
from theano import tensor as
T
from theano import tensor as
tt
from theano.ifelse import ifelse
from theano.ifelse import ifelse
import theano, time, numpy
import theano, time, numpy
a,b =
T
.scalars('a', 'b')
a,b =
tt
.scalars('a', 'b')
x,y =
T
.matrices('x', 'y')
x,y =
tt
.matrices('x', 'y')
z_switch =
T.switch(T.lt(a, b), T.mean(x), T
.mean(y))
z_switch =
tt.switch(tt.lt(a, b), tt.mean(x), tt
.mean(y))
z_lazy = ifelse(
T.lt(a, b), T.mean(x), T
.mean(y))
z_lazy = ifelse(
tt.lt(a, b), tt.mean(x), tt
.mean(y))
f_switch = theano.function([a, b, x, y], z_switch,
f_switch = theano.function([a, b, x, y], z_switch,
mode=theano.Mode(linker='vm'))
mode=theano.Mode(linker='vm'))
...
...
doc/tutorial/debug_faq.txt
浏览文件 @
33667eb7
...
@@ -27,10 +27,10 @@ messages. Consider the following faulty code.
...
@@ -27,10 +27,10 @@ messages. Consider the following faulty code.
import numpy as np
import numpy as np
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
x =
T
.vector()
x =
tt
.vector()
y =
T
.vector()
y =
tt
.vector()
z = x + x
z = x + x
z = z + y
z = z + y
f = theano.function([x, y], z)
f = theano.function([x, y], z)
...
@@ -103,7 +103,7 @@ following example. Here, we use ``exception_verbosity=high`` and
...
@@ -103,7 +103,7 @@ following example. Here, we use ``exception_verbosity=high`` and
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
# compute_test_value is 'off' by default, meaning this feature is inactive
# compute_test_value is 'off' by default, meaning this feature is inactive
theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature
theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature
...
@@ -115,7 +115,7 @@ following example. Here, we use ``exception_verbosity=high`` and
...
@@ -115,7 +115,7 @@ following example. Here, we use ``exception_verbosity=high`` and
W2 = theano.shared(W2val, 'W2')
W2 = theano.shared(W2val, 'W2')
# input which will be of shape (5,10)
# input which will be of shape (5,10)
x =
T
.matrix('x')
x =
tt
.matrix('x')
# provide Theano with a default test-value
# provide Theano with a default test-value
#x.tag.test_value = numpy.random.rand(5, 10)
#x.tag.test_value = numpy.random.rand(5, 10)
...
@@ -124,10 +124,10 @@ following example. Here, we use ``exception_verbosity=high`` and
...
@@ -124,10 +124,10 @@ following example. Here, we use ``exception_verbosity=high`` and
func_of_W1 = W1.dimshuffle(2, 0, 1).flatten(2).T
func_of_W1 = W1.dimshuffle(2, 0, 1).flatten(2).T
# source of error: dot product of 5x10 with 20x10
# source of error: dot product of 5x10 with 20x10
h1 =
T
.dot(x, func_of_W1)
h1 =
tt
.dot(x, func_of_W1)
# do more stuff
# do more stuff
h2 =
T
.dot(h1, W2.T)
h2 =
tt
.dot(h1, W2.T)
# compile and call the actual function
# compile and call the actual function
f = theano.function([x], h2)
f = theano.function([x], h2)
...
@@ -172,7 +172,7 @@ so slightly, we can get Theano to reveal the exact source of the error.
...
@@ -172,7 +172,7 @@ so slightly, we can get Theano to reveal the exact source of the error.
...
...
# input which will be of shape (5, 10)
# input which will be of shape (5, 10)
x =
T
.matrix('x')
x =
tt
.matrix('x')
# provide Theano with a default test-value
# provide Theano with a default test-value
x.tag.test_value = numpy.random.rand(5, 10)
x.tag.test_value = numpy.random.rand(5, 10)
...
@@ -187,7 +187,7 @@ following error message, which properly identifies *line 24* as the culprit.
...
@@ -187,7 +187,7 @@ following error message, which properly identifies *line 24* as the culprit.
Traceback (most recent call last):
Traceback (most recent call last):
File "test2.py", line 24, in <module>
File "test2.py", line 24, in <module>
h1 =
T
.dot(x, func_of_W1)
h1 =
tt
.dot(x, func_of_W1)
File "PATH_TO_THEANO/theano/tensor/basic.py", line 4734, in dot
File "PATH_TO_THEANO/theano/tensor/basic.py", line 4734, in dot
return _dot(a, b)
return _dot(a, b)
File "PATH_TO_THEANO/theano/gof/op.py", line 545, in __call__
File "PATH_TO_THEANO/theano/gof/op.py", line 545, in __call__
...
@@ -225,12 +225,12 @@ It is also possible to override variables ``__repr__`` method to have them retur
...
@@ -225,12 +225,12 @@ It is also possible to override variables ``__repr__`` method to have them retur
.. testsetup:: printtestvalue
.. testsetup:: printtestvalue
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
.. testcode:: printtestvalue
.. testcode:: printtestvalue
x =
T
.scalar('x')
x =
tt
.scalar('x')
# Assigning test value
# Assigning test value
x.tag.test_value = 42
x.tag.test_value = 42
...
@@ -485,10 +485,10 @@ Consider this example script ("ex.py"):
...
@@ -485,10 +485,10 @@ Consider this example script ("ex.py"):
import theano
import theano
import numpy
import numpy
import theano.tensor as
T
import theano.tensor as
tt
a =
T
.dmatrix('a')
a =
tt
.dmatrix('a')
b =
T
.dmatrix('b')
b =
tt
.dmatrix('b')
f = theano.function([a, b], [a * b])
f = theano.function([a, b], [a * b])
...
...
doc/tutorial/examples.txt
浏览文件 @
33667eb7
...
@@ -41,9 +41,9 @@ Well, what you do is this:
...
@@ -41,9 +41,9 @@ Well, what you do is this:
.. tests/test_tutorial.py:T_examples.test_examples_1
.. tests/test_tutorial.py:T_examples.test_examples_1
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> x =
T
.dmatrix('x')
>>> x =
tt
.dmatrix('x')
>>> s = 1 / (1 +
T
.exp(-x))
>>> s = 1 / (1 +
tt
.exp(-x))
>>> logistic = theano.function([x], s)
>>> logistic = theano.function([x], s)
>>> logistic([[0, 1], [-1, -2]])
>>> logistic([[0, 1], [-1, -2]])
array([[ 0.5 , 0.73105858],
array([[ 0.5 , 0.73105858],
...
@@ -64,7 +64,7 @@ We can verify that this alternate form produces the same values:
...
@@ -64,7 +64,7 @@ We can verify that this alternate form produces the same values:
.. If you modify this code, also change :
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_2
.. tests/test_tutorial.py:T_examples.test_examples_2
>>> s2 = (1 +
T
.tanh(x / 2)) / 2
>>> s2 = (1 +
tt
.tanh(x / 2)) / 2
>>> logistic2 = theano.function([x], s2)
>>> logistic2 = theano.function([x], s2)
>>> logistic2([[0, 1], [-1, -2]])
>>> logistic2([[0, 1], [-1, -2]])
array([[ 0.5 , 0.73105858],
array([[ 0.5 , 0.73105858],
...
@@ -81,7 +81,7 @@ squared difference between two matrices *a* and *b* at the same time:
...
@@ -81,7 +81,7 @@ squared difference between two matrices *a* and *b* at the same time:
.. If you modify this code, also change :
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_3
.. tests/test_tutorial.py:T_examples.test_examples_3
>>> a, b =
T
.dmatrices('a', 'b')
>>> a, b =
tt
.dmatrices('a', 'b')
>>> diff = a - b
>>> diff = a - b
>>> abs_diff = abs(diff)
>>> abs_diff = abs(diff)
>>> diff_squared = diff**2
>>> diff_squared = diff**2
...
@@ -114,7 +114,7 @@ one. You can do it like this:
...
@@ -114,7 +114,7 @@ one. You can do it like this:
>>> from theano import In
>>> from theano import In
>>> from theano import function
>>> from theano import function
>>> x, y =
T
.dscalars('x', 'y')
>>> x, y =
tt
.dscalars('x', 'y')
>>> z = x + y
>>> z = x + y
>>> f = function([x, In(y, value=1)], z)
>>> f = function([x, In(y, value=1)], z)
>>> f(33)
>>> f(33)
...
@@ -135,7 +135,7 @@ be set positionally or by name, as in standard Python:
...
@@ -135,7 +135,7 @@ be set positionally or by name, as in standard Python:
.. If you modify this code, also change :
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_7
.. tests/test_tutorial.py:T_examples.test_examples_7
>>> x, y, w =
T
.dscalars('x', 'y', 'w')
>>> x, y, w =
tt
.dscalars('x', 'y', 'w')
>>> z = (x + y) * w
>>> z = (x + y) * w
>>> f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
>>> f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
>>> f(33)
>>> f(33)
...
@@ -180,7 +180,7 @@ internal state, and returns the old state value.
...
@@ -180,7 +180,7 @@ internal state, and returns the old state value.
>>> from theano import shared
>>> from theano import shared
>>> state = shared(0)
>>> state = shared(0)
>>> inc =
T
.iscalar('inc')
>>> inc =
tt
.iscalar('inc')
>>> accumulator = function([inc], state, updates=[(state, state+inc)])
>>> accumulator = function([inc], state, updates=[(state, state+inc)])
This code introduces a few new concepts. The ``shared`` function constructs
This code introduces a few new concepts. The ``shared`` function constructs
...
@@ -257,7 +257,7 @@ for the purpose of one particular function.
...
@@ -257,7 +257,7 @@ for the purpose of one particular function.
>>> fn_of_state = state * 2 + inc
>>> fn_of_state = state * 2 + inc
>>> # The type of foo must match the shared variable we are replacing
>>> # The type of foo must match the shared variable we are replacing
>>> # with the ``givens``
>>> # with the ``givens``
>>> foo =
T
.scalar(dtype=state.dtype)
>>> foo =
tt
.scalar(dtype=state.dtype)
>>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
>>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
>>> skip_shared(1, 3) # we're using 3 for the state, not state.value
>>> skip_shared(1, 3) # we're using 3 for the state, not state.value
array(7)
array(7)
...
@@ -292,9 +292,9 @@ so compilation only needs to be performed once.
...
@@ -292,9 +292,9 @@ so compilation only needs to be performed once.
Let's start from the accumulator defined above:
Let's start from the accumulator defined above:
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> state = theano.shared(0)
>>> state = theano.shared(0)
>>> inc =
T
.iscalar('inc')
>>> inc =
tt
.iscalar('inc')
>>> accumulator = theano.function([inc], state, updates=[(state, state+inc)])
>>> accumulator = theano.function([inc], state, updates=[(state, state+inc)])
We can use it to increment the state as usual:
We can use it to increment the state as usual:
...
@@ -463,7 +463,7 @@ to another is shown below.
...
@@ -463,7 +463,7 @@ to another is shown below.
>>> from __future__ import print_function
>>> from __future__ import print_function
>>> import theano
>>> import theano
>>> import numpy
>>> import numpy
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> from theano.sandbox.rng_mrg import MRG_RandomStreams
>>> from theano.sandbox.rng_mrg import MRG_RandomStreams
>>> from theano.tensor.shared_randomstreams import RandomStreams
>>> from theano.tensor.shared_randomstreams import RandomStreams
...
@@ -533,7 +533,7 @@ It will be used repeatedly.
...
@@ -533,7 +533,7 @@ It will be used repeatedly.
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400 # training sample size
N = 400 # training sample size
...
@@ -544,8 +544,8 @@ It will be used repeatedly.
...
@@ -544,8 +544,8 @@ It will be used repeatedly.
training_steps = 10000
training_steps = 10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x =
T
.dmatrix("x")
x =
tt
.dmatrix("x")
y =
T
.dvector("y")
y =
tt
.dvector("y")
# initialize the weight vector w randomly
# initialize the weight vector w randomly
#
#
...
@@ -562,15 +562,16 @@ It will be used repeatedly.
...
@@ -562,15 +562,16 @@ It will be used repeatedly.
print(b.get_value())
print(b.get_value())
# Construct Theano expression graph
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
p_1 = 1 / (1 + tt.exp(-T.dot(x, w) - b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
xent = -y * tt.log(p_1) - (1-y) * tt.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost
gw, gb = tt.grad(cost, [w, b]) # Compute the gradient of the cost
# w.r.t weight vector w and
# w.r.t weight vector w and
# bias term b
# bias term b (we shall
# (we shall return to this in a
# return to this in a
# following section of this tutorial)
# following section of this
# tutorial)
# Compile
# Compile
train = theano.function(
train = theano.function(
...
...
doc/tutorial/gradients.txt
浏览文件 @
33667eb7
...
@@ -11,7 +11,7 @@ Computing Gradients
...
@@ -11,7 +11,7 @@ Computing Gradients
Now let's use Theano for a slightly more sophisticated task: create a
Now let's use Theano for a slightly more sophisticated task: create a
function which computes the derivative of some expression *y* with
function which computes the derivative of some expression *y* with
respect to its parameter *x*. To do this we will use the macro ``
T
.grad``.
respect to its parameter *x*. To do this we will use the macro ``
tt
.grad``.
For instance, we can compute the
For instance, we can compute the
gradient of :math:`x^2` with respect to :math:`x`. Note that:
gradient of :math:`x^2` with respect to :math:`x`. Note that:
:math:`d(x^2)/dx = 2 \cdot x`.
:math:`d(x^2)/dx = 2 \cdot x`.
...
@@ -25,11 +25,11 @@ Here is the code to compute this gradient:
...
@@ -25,11 +25,11 @@ Here is the code to compute this gradient:
>>> import numpy
>>> import numpy
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> from theano import pp
>>> from theano import pp
>>> x =
T
.dscalar('x')
>>> x =
tt
.dscalar('x')
>>> y = x ** 2
>>> y = x ** 2
>>> gy =
T
.grad(y, x)
>>> gy =
tt
.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization
>>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
>>> f = theano.function([x], gy)
>>> f = theano.function([x], gy)
...
@@ -68,30 +68,30 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
...
@@ -68,30 +68,30 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
.. If you modify this code, also change :
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_5
.. tests/test_tutorial.py:T_examples.test_examples_5
>>> x =
T
.dmatrix('x')
>>> x =
tt
.dmatrix('x')
>>> s =
T.sum(1 / (1 + T
.exp(-x)))
>>> s =
tt.sum(1 / (1 + tt
.exp(-x)))
>>> gs =
T
.grad(s, x)
>>> gs =
tt
.grad(s, x)
>>> dlogistic = theano.function([x], gs)
>>> dlogistic = theano.function([x], gs)
>>> dlogistic([[0, 1], [-1, -2]])
>>> dlogistic([[0, 1], [-1, -2]])
array([[ 0.25 , 0.19661193],
array([[ 0.25 , 0.19661193],
[ 0.19661193, 0.10499359]])
[ 0.19661193, 0.10499359]])
In general, for any **scalar** expression *s*, ``
T
.grad(s, w)`` provides
In general, for any **scalar** expression *s*, ``
tt
.grad(s, w)`` provides
the Theano expression for computing :math:`\frac{\partial s}{\partial w}`. In
the Theano expression for computing :math:`\frac{\partial s}{\partial w}`. In
this way Theano can be used for doing **efficient** symbolic differentiation
this way Theano can be used for doing **efficient** symbolic differentiation
(as the expression returned by ``
T
.grad`` will be optimized during compilation), even for
(as the expression returned by ``
tt
.grad`` will be optimized during compilation), even for
function with many inputs. (see `automatic differentiation <http://en.wikipedia.org/wiki/Automatic_differentiation>`_ for a description
function with many inputs. (see `automatic differentiation <http://en.wikipedia.org/wiki/Automatic_differentiation>`_ for a description
of symbolic differentiation).
of symbolic differentiation).
.. note::
.. note::
The second argument of ``
T
.grad`` can be a list, in which case the
The second argument of ``
tt
.grad`` can be a list, in which case the
output is also a list. The order in both lists is important: element
output is also a list. The order in both lists is important: element
*i* of the output list is the gradient of the first argument of
*i* of the output list is the gradient of the first argument of
``
T
.grad`` with respect to the *i*-th element of the list given as second argument.
``
tt
.grad`` with respect to the *i*-th element of the list given as second argument.
The first argument of ``
T
.grad`` has to be a scalar (a tensor
The first argument of ``
tt
.grad`` has to be a scalar (a tensor
of size 1). For more information on the semantics of the arguments of
of size 1). For more information on the semantics of the arguments of
``
T
.grad`` and details about the implementation, see
``
tt
.grad`` and details about the implementation, see
:ref:`this<libdoc_gradient>` section of the library.
:ref:`this<libdoc_gradient>` section of the library.
Additional information on the inner workings of differentiation may also be
Additional information on the inner workings of differentiation may also be
...
@@ -121,25 +121,25 @@ do is to loop over the entries in *y* and compute the gradient of
...
@@ -121,25 +121,25 @@ do is to loop over the entries in *y* and compute the gradient of
shall return to :ref:`scan<tutloop>` later in this tutorial.
shall return to :ref:`scan<tutloop>` later in this tutorial.
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> y = x ** 2
>>> y = x ** 2
>>> J, updates = theano.scan(lambda i, y, x :
T
.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
>>> J, updates = theano.scan(lambda i, y, x :
tt
.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
>>> f = theano.function([x], J, updates=updates)
>>> f = theano.function([x], J, updates=updates)
>>> f([4, 4])
>>> f([4, 4])
array([[ 8., 0.],
array([[ 8., 0.],
[ 0., 8.]])
[ 0., 8.]])
What we do in this code is to generate a sequence of *ints* from *0* to
What we do in this code is to generate a sequence of *ints* from *0* to
``y.shape[0]`` using ``
T
.arange``. Then we loop through this sequence, and
``y.shape[0]`` using ``
tt
.arange``. Then we loop through this sequence, and
at each step, we compute the gradient of element *y[i]* with respect to
at each step, we compute the gradient of element *y[i]* with respect to
*x*. ``scan`` automatically concatenates all these rows, generating a
*x*. ``scan`` automatically concatenates all these rows, generating a
matrix which corresponds to the Jacobian.
matrix which corresponds to the Jacobian.
.. note::
.. note::
There are some pitfalls to be aware of regarding ``
T
.grad``. One of them is that you
There are some pitfalls to be aware of regarding ``
tt
.grad``. One of them is that you
cannot re-write the above expression of the Jacobian as
cannot re-write the above expression of the Jacobian as
``theano.scan(lambda y_i,x:
T
.grad(y_i,x), sequences=y,
``theano.scan(lambda y_i,x:
tt
.grad(y_i,x), sequences=y,
non_sequences=x)``, even though from the documentation of scan this
non_sequences=x)``, even though from the documentation of scan this
seems possible. The reason is that *y_i* will not be a function of
seems possible. The reason is that *y_i* will not be a function of
*x* anymore, while *y[i]* still is.
*x* anymore, while *y[i]* still is.
...
@@ -156,14 +156,14 @@ to do it manually.
...
@@ -156,14 +156,14 @@ to do it manually.
You can compute the Hessian manually similarly to the Jacobian. The only
You can compute the Hessian manually similarly to the Jacobian. The only
difference is that now, instead of computing the Jacobian of some expression
difference is that now, instead of computing the Jacobian of some expression
*y*, we compute the Jacobian of ``
T
.grad(cost,x)``, where *cost* is some
*y*, we compute the Jacobian of ``
tt
.grad(cost,x)``, where *cost* is some
scalar.
scalar.
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> y = x ** 2
>>> y = x ** 2
>>> cost = y.sum()
>>> cost = y.sum()
>>> gy =
T
.grad(cost, x)
>>> gy =
tt
.grad(cost, x)
>>> H, updates = theano.scan(lambda i, gy,x :
T
.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
>>> H, updates = theano.scan(lambda i, gy,x :
tt
.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
>>> f = theano.function([x], H, updates=updates)
>>> f = theano.function([x], H, updates=updates)
>>> f([4, 4])
>>> f([4, 4])
array([[ 2., 0.],
array([[ 2., 0.],
...
@@ -200,11 +200,11 @@ form of the operation. In order to evaluate the *R-operation* of
...
@@ -200,11 +200,11 @@ form of the operation. In order to evaluate the *R-operation* of
expression *y*, with respect to *x*, multiplying the Jacobian with *v*
expression *y*, with respect to *x*, multiplying the Jacobian with *v*
you need to do something similar to this:
you need to do something similar to this:
>>> W =
T
.dmatrix('W')
>>> W =
tt
.dmatrix('W')
>>> V =
T
.dmatrix('V')
>>> V =
tt
.dmatrix('V')
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> y =
T
.dot(x, W)
>>> y =
tt
.dot(x, W)
>>> JV =
T
.Rop(y, W, V)
>>> JV =
tt
.Rop(y, W, V)
>>> f = theano.function([W, V, x], JV)
>>> f = theano.function([W, V, x], JV)
>>> f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])
>>> f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])
array([ 2., 2.])
array([ 2., 2.])
...
@@ -219,11 +219,11 @@ the Jacobian. The mathematical formula would be :math:`v \frac{\partial
...
@@ -219,11 +219,11 @@ the Jacobian. The mathematical formula would be :math:`v \frac{\partial
f(x)}{\partial x}`. The *L-operator* is also supported for generic tensors
f(x)}{\partial x}`. The *L-operator* is also supported for generic tensors
(not only for vectors). Similarly, it can be implemented as follows:
(not only for vectors). Similarly, it can be implemented as follows:
>>> W =
T
.dmatrix('W')
>>> W =
tt
.dmatrix('W')
>>> v =
T
.dvector('v')
>>> v =
tt
.dvector('v')
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> y =
T
.dot(x, W)
>>> y =
tt
.dot(x, W)
>>> VJ =
T
.Lop(y, W, v)
>>> VJ =
tt
.Lop(y, W, v)
>>> f = theano.function([v,x], VJ)
>>> f = theano.function([v,x], VJ)
>>> f([2, 2], [0, 1])
>>> f([2, 2], [0, 1])
array([[ 0., 0.],
array([[ 0., 0.],
...
@@ -251,11 +251,11 @@ Hessian matrix, you have two options that will
...
@@ -251,11 +251,11 @@ Hessian matrix, you have two options that will
give you the same result, though these options might exhibit differing performances.
give you the same result, though these options might exhibit differing performances.
Hence, we suggest profiling the methods before using either one of the two:
Hence, we suggest profiling the methods before using either one of the two:
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> v =
T
.dvector('v')
>>> v =
tt
.dvector('v')
>>> y =
T
.sum(x ** 2)
>>> y =
tt
.sum(x ** 2)
>>> gy =
T
.grad(y, x)
>>> gy =
tt
.grad(y, x)
>>> vH =
T.grad(T
.sum(gy * v), x)
>>> vH =
tt.grad(tt
.sum(gy * v), x)
>>> f = theano.function([x, v], vH)
>>> f = theano.function([x, v], vH)
>>> f([4, 4], [2, 2])
>>> f([4, 4], [2, 2])
array([ 4., 4.])
array([ 4., 4.])
...
@@ -263,11 +263,11 @@ array([ 4., 4.])
...
@@ -263,11 +263,11 @@ array([ 4., 4.])
or, making use of the *R-operator*:
or, making use of the *R-operator*:
>>> x =
T
.dvector('x')
>>> x =
tt
.dvector('x')
>>> v =
T
.dvector('v')
>>> v =
tt
.dvector('v')
>>> y =
T
.sum(x ** 2)
>>> y =
tt
.sum(x ** 2)
>>> gy =
T
.grad(y, x)
>>> gy =
tt
.grad(y, x)
>>> Hv =
T
.Rop(gy, x, v)
>>> Hv =
tt
.Rop(gy, x, v)
>>> f = theano.function([x, v], Hv)
>>> f = theano.function([x, v], Hv)
>>> f([4, 4], [2, 2])
>>> f([4, 4], [2, 2])
array([ 4., 4.])
array([ 4., 4.])
...
...
doc/tutorial/index.txt
浏览文件 @
33667eb7
...
@@ -11,9 +11,9 @@ Let us start an interactive session (e.g. with ``python`` or ``ipython``) and im
...
@@ -11,9 +11,9 @@ Let us start an interactive session (e.g. with ``python`` or ``ipython``) and im
Several of the symbols you will need to use are in the ``tensor`` subpackage
Several of the symbols you will need to use are in the ``tensor`` subpackage
of Theano. Let us import that subpackage under a handy name like
of Theano. Let us import that subpackage under a handy name like
``
T
`` (the tutorials will frequently use this convention).
``
tt
`` (the tutorials will frequently use this convention).
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
If that succeeded you are ready for the tutorial, otherwise check your
If that succeeded you are ready for the tutorial, otherwise check your
installation (see :ref:`install`).
installation (see :ref:`install`).
...
@@ -68,7 +68,7 @@ Further readings
...
@@ -68,7 +68,7 @@ Further readings
.. toctree::
.. toctree::
../extending/graphstructures
../extending/graphstructures
loading_and_saving
loading_and_saving
aliasing
aliasing
multi_cores
multi_cores
...
...
doc/tutorial/loop.txt
浏览文件 @
33667eb7
...
@@ -32,15 +32,15 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -32,15 +32,15 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# defining the tensor variables
# defining the tensor variables
X =
T
.matrix("X")
X =
tt
.matrix("X")
W =
T
.matrix("W")
W =
tt
.matrix("W")
b_sym =
T
.vector("b_sym")
b_sym =
tt
.vector("b_sym")
results, updates = theano.scan(lambda v:
T.tanh(T
.dot(v, W) + b_sym), sequences=X)
results, updates = theano.scan(lambda v:
tt.tanh(tt
.dot(v, W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)
# test values
# test values
...
@@ -66,19 +66,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -66,19 +66,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variables
# define tensor variables
X =
T
.vector("X")
X =
tt
.vector("X")
W =
T
.matrix("W")
W =
tt
.matrix("W")
b_sym =
T
.vector("b_sym")
b_sym =
tt
.vector("b_sym")
U =
T
.matrix("U")
U =
tt
.matrix("U")
Y =
T
.matrix("Y")
Y =
tt
.matrix("Y")
V =
T
.matrix("V")
V =
tt
.matrix("V")
P =
T
.matrix("P")
P =
tt
.matrix("P")
results, updates = theano.scan(lambda y, p, x_tm1:
T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T
.dot(p, V)),
results, updates = theano.scan(lambda y, p, x_tm1:
tt.tanh(tt.dot(x_tm1, W) + tt.dot(y, U) + tt
.dot(p, V)),
sequences=[Y, P[::-1]], outputs_info=[X])
sequences=[Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=results)
compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=results)
...
@@ -120,12 +120,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -120,12 +120,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variable
# define tensor variable
X =
T
.matrix("X")
X =
tt
.matrix("X")
results, updates = theano.scan(lambda x_i:
T
.sqrt((x_i ** 2).sum()), sequences=[X])
results, updates = theano.scan(lambda x_i:
tt
.sqrt((x_i ** 2).sum()), sequences=[X])
compute_norm_lines = theano.function(inputs=[X], outputs=results)
compute_norm_lines = theano.function(inputs=[X], outputs=results)
# test value
# test value
...
@@ -145,12 +145,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -145,12 +145,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variable
# define tensor variable
X =
T
.matrix("X")
X =
tt
.matrix("X")
results, updates = theano.scan(lambda x_i:
T
.sqrt((x_i ** 2).sum()), sequences=[X.T])
results, updates = theano.scan(lambda x_i:
tt
.sqrt((x_i ** 2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=results)
compute_norm_cols = theano.function(inputs=[X], outputs=results)
# test value
# test value
...
@@ -170,14 +170,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -170,14 +170,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
floatX = "float32"
floatX = "float32"
# define tensor variable
# define tensor variable
X =
T
.matrix("X")
X =
tt
.matrix("X")
results, updates = theano.scan(lambda i, j, t_f:
T
.cast(X[i, j] + t_f, floatX),
results, updates = theano.scan(lambda i, j, t_f:
tt
.cast(X[i, j] + t_f, floatX),
sequences=[
T.arange(X.shape[0]), T
.arange(X.shape[1])],
sequences=[
tt.arange(X.shape[0]), tt
.arange(X.shape[1])],
outputs_info=np.asarray(0., dtype=floatX))
outputs_info=np.asarray(0., dtype=floatX))
result = results[-1]
result = results[-1]
compute_trace = theano.function(inputs=[X], outputs=result)
compute_trace = theano.function(inputs=[X], outputs=result)
...
@@ -201,18 +201,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -201,18 +201,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variables
# define tensor variables
X =
T
.matrix("X")
X =
tt
.matrix("X")
W =
T
.matrix("W")
W =
tt
.matrix("W")
b_sym =
T
.vector("b_sym")
b_sym =
tt
.vector("b_sym")
U =
T
.matrix("U")
U =
tt
.matrix("U")
V =
T
.matrix("V")
V =
tt
.matrix("V")
n_sym =
T
.iscalar("n_sym")
n_sym =
tt
.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2, x_tm1:
T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T
.dot(x_tm1, W) + b_sym),
results, updates = theano.scan(lambda x_tm2, x_tm1:
tt.dot(x_tm2, U) + tt.dot(x_tm1, V) + tt.tanh(tt
.dot(x_tm1, W) + b_sym),
n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
...
@@ -266,14 +266,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
...
@@ -266,14 +266,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variables
# define tensor variables
v =
T
.vector()
v =
tt
.vector()
A =
T
.matrix()
A =
tt
.matrix()
y =
T.tanh(T
.dot(v, A))
y =
tt.tanh(tt
.dot(v, A))
results, updates = theano.scan(lambda i:
T.grad(y[i], v), sequences=[T
.arange(y.shape[0])])
results, updates = theano.scan(lambda i:
tt.grad(y[i], v), sequences=[tt
.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
# test values
# test values
...
@@ -301,12 +301,12 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
...
@@ -301,12 +301,12 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define shared variables
# define shared variables
k = theano.shared(0)
k = theano.shared(0)
n_sym =
T
.iscalar("n_sym")
n_sym =
tt
.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
...
@@ -320,19 +320,19 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
...
@@ -320,19 +320,19 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
import numpy as np
import numpy as np
# define tensor variables
# define tensor variables
X =
T
.matrix("X")
X =
tt
.matrix("X")
W =
T
.matrix("W")
W =
tt
.matrix("W")
b_sym =
T
.vector("b_sym")
b_sym =
tt
.vector("b_sym")
# define shared random stream
# define shared random stream
trng =
T
.shared_randomstreams.RandomStreams(1234)
trng =
tt
.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v:
T.tanh(T
.dot(v, W) + b_sym) * d, sequences=X)
results, updates = theano.scan(lambda v:
tt.tanh(tt
.dot(v, W) + b_sym) * d, sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
updates=updates, allow_input_downcast=True)
updates=updates, allow_input_downcast=True)
x = np.eye(10, 2, dtype=theano.config.floatX)
x = np.eye(10, 2, dtype=theano.config.floatX)
...
@@ -361,18 +361,18 @@ Note that if you want to use a random variable ``d`` that will not be updated th
...
@@ -361,18 +361,18 @@ Note that if you want to use a random variable ``d`` that will not be updated th
.. testcode::
.. testcode::
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
theano.config.warn.subtensor_merge_bug = False
theano.config.warn.subtensor_merge_bug = False
k =
T
.iscalar("k")
k =
tt
.iscalar("k")
A =
T
.vector("A")
A =
tt
.vector("A")
def inner_fct(prior_result, B):
def inner_fct(prior_result, B):
return prior_result * B
return prior_result * B
# Symbolic description of the result
# Symbolic description of the result
result, updates = theano.scan(fn=inner_fct,
result, updates = theano.scan(fn=inner_fct,
outputs_info=
T
.ones_like(A),
outputs_info=
tt
.ones_like(A),
non_sequences=A, n_steps=k)
non_sequences=A, n_steps=k)
# Scan has provided us with A ** 1 through A ** k. Keep only the last
# Scan has provided us with A ** 1 through A ** k. Keep only the last
...
@@ -395,11 +395,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
...
@@ -395,11 +395,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
theano.config.warn.subtensor_merge_bug = False
theano.config.warn.subtensor_merge_bug = False
coefficients = theano.tensor.vector("coefficients")
coefficients = theano.tensor.vector("coefficients")
x =
T
.scalar("x")
x =
tt
.scalar("x")
max_coefficients_supported = 10000
max_coefficients_supported = 10000
# Generate the components of the polynomial
# Generate the components of the polynomial
...
...
doc/tutorial/modes.txt
浏览文件 @
33667eb7
...
@@ -47,7 +47,7 @@ Consider the logistic regression:
...
@@ -47,7 +47,7 @@ Consider the logistic regression:
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400
N = 400
...
@@ -57,19 +57,19 @@ Consider the logistic regression:
...
@@ -57,19 +57,19 @@ Consider the logistic regression:
training_steps = 10000
training_steps = 10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x =
T
.matrix("x")
x =
tt
.matrix("x")
y =
T
.vector("y")
y =
tt
.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
x.tag.test_value = D[0]
y.tag.test_value = D[1]
y.tag.test_value = D[1]
# Construct Theano expression graph
# Construct Theano expression graph
p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability of having a one
p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy
xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
# Compile expressions to functions
# Compile expressions to functions
train = theano.function(
train = theano.function(
...
@@ -252,7 +252,7 @@ DebugMode is used as follows:
...
@@ -252,7 +252,7 @@ DebugMode is used as follows:
.. testcode::
.. testcode::
x =
T
.dvector('x')
x =
tt
.dvector('x')
f = theano.function([x], 10 * x, mode='DebugMode')
f = theano.function([x], 10 * x, mode='DebugMode')
...
...
doc/tutorial/printing_drawing.txt
浏览文件 @
33667eb7
...
@@ -27,7 +27,7 @@ Consider again the logistic regression example:
...
@@ -27,7 +27,7 @@ Consider again the logistic regression example:
>>> import numpy
>>> import numpy
>>> import theano
>>> import theano
>>> import theano.tensor as
T
>>> import theano.tensor as
tt
>>> rng = numpy.random
>>> rng = numpy.random
>>> # Training data
>>> # Training data
>>> N = 400
>>> N = 400
...
@@ -35,19 +35,19 @@ Consider again the logistic regression example:
...
@@ -35,19 +35,19 @@ Consider again the logistic regression example:
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
>>> training_steps = 10000
>>> training_steps = 10000
>>> # Declare Theano symbolic variables
>>> # Declare Theano symbolic variables
>>> x =
T
.matrix("x")
>>> x =
tt
.matrix("x")
>>> y =
T
.vector("y")
>>> y =
tt
.vector("y")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
>>> x.tag.test_value = D[0]
>>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1]
>>> y.tag.test_value = D[1]
>>> # Construct Theano expression graph
>>> # Construct Theano expression graph
>>> p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability of having a one
>>> p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability of having a one
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
>>> # Compute gradients
>>> # Compute gradients
>>> xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy
>>> xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
>>> gw,gb =
T
.grad(cost, [w,b])
>>> gw,gb =
tt
.grad(cost, [w,b])
>>> # Training and prediction function
>>> # Training and prediction function
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
>>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
...
...
doc/tutorial/using_gpu.txt
浏览文件 @
33667eb7
...
@@ -46,7 +46,7 @@ GpuArray Backend
...
@@ -46,7 +46,7 @@ GpuArray Backend
If you have not done so already, you will need to install libgpuarray
If you have not done so already, you will need to install libgpuarray
as well as at least one computing toolkit (CUDA or OpenCL). Detailed
as well as at least one computing toolkit (CUDA or OpenCL). Detailed
instructions to accomplish that are provided at
instructions to accomplish that are provided at
`libgpuarray <http://deeplearning.net/software/libgpuarray/installation.html>`_.
`libgpuarray <http://deeplearning.net/software/libgpuarray/installation.html>`_.
To install Nvidia's GPU-programming toolchain (CUDA) and configure
To install Nvidia's GPU-programming toolchain (CUDA) and configure
...
@@ -313,7 +313,7 @@ Consider again the logistic regression:
...
@@ -313,7 +313,7 @@ Consider again the logistic regression:
import numpy
import numpy
import theano
import theano
import theano.tensor as
T
import theano.tensor as
tt
rng = numpy.random
rng = numpy.random
N = 400
N = 400
...
@@ -323,19 +323,19 @@ Consider again the logistic regression:
...
@@ -323,19 +323,19 @@ Consider again the logistic regression:
training_steps = 10000
training_steps = 10000
# Declare Theano symbolic variables
# Declare Theano symbolic variables
x =
T
.matrix("x")
x =
tt
.matrix("x")
y =
T
.vector("y")
y =
tt
.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
x.tag.test_value = D[0]
y.tag.test_value = D[1]
y.tag.test_value = D[1]
# Construct Theano expression graph
# Construct Theano expression graph
p_1 = 1 / (1 +
T.exp(-T
.dot(x, w)-b)) # Probability of having a one
p_1 = 1 / (1 +
tt.exp(-tt
.dot(x, w)-b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y*
T.log(p_1) - (1-y)*T
.log(1-p_1) # Cross-entropy
xent = -y*
tt.log(p_1) - (1-y)*tt
.log(1-p_1) # Cross-entropy
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
gw,gb =
T
.grad(cost, [w,b])
gw,gb =
tt
.grad(cost, [w,b])
# Compile expressions to functions
# Compile expressions to functions
train = theano.function(
train = theano.function(
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论