Replace theano.tensor alias T with tt in documentation

33667eb7 · Brandon T. Willard · 1e6bbdef · 33667eb7 · 33667eb7 · 33667eb7
--- a/doc/cifarSC2011/advanced_theano.txt
+++ b/doc/cifarSC2011/advanced_theano.txt
@@ -18,15 +18,15 @@ Conditions
 .. testcode::
-   from theano import tensor as T
+   from theano import tensor as tt
   from theano.ifelse import ifelse
   import theano, time, numpy
-   a,b = T.scalars('a','b')
+   a,b = tt.scalars('a','b')
-   x,y = T.matrices('x','y')
+   x,y = tt.matrices('x','y')
-   z_switch = T.switch(T.lt(a,b), T.mean(x), T.mean(y))
+   z_switch = tt.switch(tt.lt(a,b), tt.mean(x), tt.mean(y))
-   z_lazy = ifelse(T.lt(a,b), T.mean(x), T.mean(y))
+   z_lazy = ifelse(tt.lt(a,b), tt.mean(x), tt.mean(y))
   f_switch = theano.function([a,b,x,y], z_switch,
                              mode=theano.Mode(linker='vm'))
@@ -98,14 +98,14 @@ Loops
 .. code-block:: python
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
-  k = T.iscalar("k"); A = T.vector("A")
+  k = tt.iscalar("k"); A = tt.vector("A")
  def inner_fct(prior_result, A): return prior_result * A
  # Symbolic description of the result
  result, updates = theano.scan(fn=inner_fct,
-                              outputs_info=T.ones_like(A),
+                              outputs_info=tt.ones_like(A),
                              non_sequences=A, n_steps=k)
  # Scan has provided us with A**1 through A**k.  Keep only the last
@@ -125,10 +125,10 @@ Loops
  import numpy
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  coefficients = theano.tensor.vector("coefficients")
-  x = T.scalar("x"); max_coefficients_supported = 10000
+  x = tt.scalar("x"); max_coefficients_supported = 10000
  # Generate the components of the polynomial
  full_range=theano.tensor.arange(max_coefficients_supported)
@@ -384,7 +384,7 @@ Consider the following logistic regression model:
 >>> import numpy
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> rng = numpy.random
 >>> # Training data
 >>> N = 400
@@ -392,19 +392,19 @@ Consider the following logistic regression model:
 >>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
 >>> training_steps = 10000
 >>> # Declare Theano symbolic variables
->>> x = T.matrix("x")
+>>> x = tt.matrix("x")
->>> y = T.vector("y")
+>>> y = tt.vector("y")
 >>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
 >>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
 >>> x.tag.test_value = D[0]
 >>> y.tag.test_value = D[1]
 >>> # Construct Theano expression graph
->>> p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+>>> p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
 >>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
 >>> # Compute gradients
->>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+>>> xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
 >>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = T.grad(cost, [w,b])
+>>> gw,gb = tt.grad(cost, [w,b])
 >>> # Training and prediction function
 >>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
 >>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")

--- a/doc/cifarSC2011/theano.txt
+++ b/doc/cifarSC2011/theano.txt
@@ -99,7 +99,7 @@ Real example
  import numpy
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  rng = numpy.random
  N = 400
@@ -108,19 +108,19 @@ Real example
  training_steps = 10000
  # Declare Theano symbolic variables
-  x = T.matrix("x")
+  x = tt.matrix("x")
-  y = T.vector("y")
+  y = tt.vector("y")
  w = theano.shared(rng.randn(feats), name="w")
  b = theano.shared(0., name="b")
  print "Initial model:"
  print w.get_value(), b.get_value()
  # Construct Theano expression graph
-  p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))     # Probability that target = 1
+  p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b))     # Probability that target = 1
  prediction = p_1 > 0.5                    # The prediction thresholded
-  xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy loss function
+  xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy loss function
  cost = xent.mean() + 0.01*(w**2).sum()    # The cost to minimize
-  gw,gb = T.grad(cost, [w,b])
+  gw,gb = tt.grad(cost, [w,b])
  # Compile
  train = theano.function(
@@ -145,7 +145,7 @@ Where are those optimization applied?
 * ``log(1+exp(x))``
-* ``1 / (1 + T.exp(var))`` (sigmoid)
+* ``1 / (1 + tt.exp(var))`` (sigmoid)
 * ``log(1-sigmoid(var))`` (softplus, stabilisation)
@@ -156,13 +156,13 @@ Where are those optimization applied?
 .. code-block:: python
-  p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))
+  p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b))
-  # 1 / (1 + T.exp(var)) -> sigmoid(var)
+  # 1 / (1 + tt.exp(var)) -> sigmoid(var)
-  xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
+  xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1)
  # Log(1-sigmoid(var)) -> -sigmoid(var)
  prediction = p_1 > 0.5
  cost = xent.mean() + 0.01*(w**2).sum()
-  gw,gb = T.grad(cost, [w,b])
+  gw,gb = tt.grad(cost, [w,b])
  train = theano.function(
            inputs=[x,y],
@@ -188,7 +188,7 @@ Exercise 2
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    rng = numpy.random
    N = 400
@@ -198,8 +198,8 @@ Exercise 2
    training_steps = 10000
    # Declare Theano symbolic variables
-    x = T.matrix("x")
+    x = tt.matrix("x")
-    y = T.vector("y")
+    y = tt.vector("y")
    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
    x.tag.test_value = D[0]
@@ -209,11 +209,11 @@ Exercise 2
    # Construct Theano expression graph
-    p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+    p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
    prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
-    xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+    xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
    cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
-    gw,gb = T.grad(cost, [w,b])
+    gw,gb = tt.grad(cost, [w,b])
    # Compile expressions to functions
    train = theano.function(
@@ -296,19 +296,19 @@ Symbolic variables
 * # Dimensions
- * T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4
+ * tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
 * Dtype
- * T.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
+ * tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
- * T.vector to floatX dtype
+ * tt.vector to floatX dtype
 * floatX: configurable dtype that can be float32 or float64.
 * Custom variable
- * All are shortcuts to: ``T.tensor(dtype, broadcastable=[False]*nd)``
+ * All are shortcuts to: ``tt.tensor(dtype, broadcastable=[False]*nd)``
 * Other dtype: uint[8,16,32,64], floatX
@@ -325,21 +325,21 @@ Details regarding symbolic broadcasting...
 * Broadcastability must be specified when creating the variable
-* The only shorcut with broadcastable dimensions are: **T.row** and **T.col**
+* The only shorcut with broadcastable dimensions are: **tt.row** and **tt.col**
-* For all others: ``T.tensor(dtype, broadcastable=([False or True])*nd)``
+* For all others: ``tt.tensor(dtype, broadcastable=([False or True])*nd)``
 Differentiation details
 -----------------------
->>> gw,gb = T.grad(cost, [w,b])  # doctest: +SKIP
+>>> gw,gb = tt.grad(cost, [w,b])  # doctest: +SKIP
-* T.grad works symbolically: takes and returns a Theano variable
+* tt.grad works symbolically: takes and returns a Theano variable
-* T.grad can be compared to a macro: it can be applied multiple times
+* tt.grad can be compared to a macro: it can be applied multiple times
-* T.grad takes scalar costs only
+* tt.grad takes scalar costs only
 * Simple recipe allows to compute efficiently vector x Jacobian and vector x Hessian

--- a/doc/crei2013/advanced_theano.txt
+++ b/doc/crei2013/advanced_theano.txt
@@ -116,7 +116,7 @@ Consider the following logistic regression model:
 >>> import numpy
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> rng = numpy.random
 >>> # Training data
 >>> N = 400
@@ -124,19 +124,19 @@ Consider the following logistic regression model:
 >>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
 >>> training_steps = 10000
 >>> # Declare Theano symbolic variables
->>> x = T.matrix("x")
+>>> x = tt.matrix("x")
->>> y = T.vector("y")
+>>> y = tt.vector("y")
 >>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
 >>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
 >>> x.tag.test_value = D[0]
 >>> y.tag.test_value = D[1]
 >>> # Construct Theano expression graph
->>> p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+>>> p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b)) # Probability of having a one
 >>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
 >>> # Compute gradients
 >>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
 >>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = T.grad(cost, [w,b])
+>>> gw,gb = tt.grad(cost, [w,b])
 >>> # Training and prediction function
 >>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
 >>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")

--- a/doc/extending/extending_theano_c.txt
+++ b/doc/extending/extending_theano_c.txt
@@ -474,8 +474,9 @@ storage with the right shape and number of dimensions.
    import numpy
    import theano
    from theano import gof
-    import theano.tensor as T
    class VectorTimesScalar(gof.Op):
        __props__ = ()

--- a/doc/extending/fibby.txt
+++ b/doc/extending/fibby.txt
@@ -137,12 +137,12 @@ Here is some code to test that the optimization is applied only when needed.
 .. testcode::
   import numpy
-   import theano.tensor as T
+   import theano.tensor as tt
   from theano import function
   from theano import tensor
   # Test it does not apply when not needed
-   x = T.dvector()
+   x = tt.dvector()
   f = function([x], fibby(x))
   # We call the function to make sure it runs.
@@ -153,7 +153,7 @@ Here is some code to test that the optimization is applied only when needed.
   assert isinstance(topo[0].op, Fibby)
   # Test that the optimization gets applied.
-   f_zero = function([], fibby(T.zeros([5])))
+   f_zero = function([], fibby(tt.zeros([5])))
   # If you run in DebugMode, it will compare the output before
   # and after the optimization.

--- a/doc/extending/graphstructures.txt
+++ b/doc/extending/graphstructures.txt
@@ -32,10 +32,10 @@ This should help you understand how these pieces fit together:
 .. testcode::
-   import theano.tensor as T
+   import theano.tensor as tt
-   x = T.dmatrix('x')
+   x = tt.dmatrix('x')
-   y = T.dmatrix('y')
+   y = tt.dmatrix('y')
   z = x + y
 **Diagram**

--- a/doc/extending/tips.txt
+++ b/doc/extending/tips.txt
@@ -21,10 +21,10 @@ simple function:
 .. testcode::
-   from theano import tensor as T
+   from theano import tensor as tt
   def sum_square_difference(a, b):
-       return T.sum((a - b)**2)
+       return tt.sum((a - b)**2)
 Even without taking Theano's optimizations into account, it is likely
 to work just as well as a custom implementation. It also supports all
@@ -53,5 +53,3 @@ defining a new Op. It might not be exhaustive but it covers a lot of
 common mistakes.
 WRITEME
--- a/doc/hpcs2011_tutorial/logreg_example.py
+++ b/doc/hpcs2011_tutorial/logreg_example.py
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 rng = np.random
 N = 400
 feats = 784
-D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
+D = (
+    rng.randn(N, feats).astype(theano.config.floatX),
+    rng.randint(size=N, low=0, high=2).astype(theano.config.floatX),
+)
 training_steps = 10000
 # Declare Theano symbolic variables
-x = T.matrix("x")
+x = tt.matrix("x")
-y = T.vector("y")
+y = tt.vector("y")
 w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-b = theano.shared(np.asarray(0., dtype=theano.config.floatX), name="b")
+b = theano.shared(np.asarray(0.0, dtype=theano.config.floatX), name="b")
 x.tag.test_value = D[0]
 y.tag.test_value = D[1]
-#print "Initial model:"
+# print "Initial model:"
-#print w.get_value(), b.get_value()
+# print w.get_value(), b.get_value()
 # Construct Theano expression graph
-p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
 prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
-xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
-cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
+cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to optimize
-gw,gb = T.grad(cost, [w,b])
+gw, gb = tt.grad(cost, [w, b])
 # Compile expressions to functions
 train = theano.function(
-            inputs=[x,y],
+    inputs=[x, y],
    outputs=[prediction, xent],
-            updates={w:w-0.01*gw, b:b-0.01*gb},
+    updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
-            name = "train")
+    name="train",
-predict = theano.function(inputs=[x], outputs=prediction,
+)
-            name = "predict")
+predict = theano.function(inputs=[x], outputs=prediction, name="predict")
-if any( [x.op.__class__.__name__=='Gemv' for x in train.maker.fgraph.toposort()]):
+if any([x.op.__class__.__name__ == "Gemv" for x in train.maker.fgraph.toposort()]):
-    print('Used the cpu')
+    print("Used the cpu")
-elif any( [x.op.__class__.__name__=='GpuGemm' for x in train.maker.fgraph.toposort()]):
+elif any([x.op.__class__.__name__ == "GpuGemm" for x in train.maker.fgraph.toposort()]):
-    print('Used the gpu')
+    print("Used the gpu")
 else:
-    print('ERROR, not able to tell if theano used the cpu or the gpu')
+    print("ERROR, not able to tell if theano used the cpu or the gpu")
    print(train.maker.fgraph.toposort())
 for i in range(training_steps):
    pred, err = train(D[0], D[1])
-#print "Final model:"
+# print "Final model:"
-#print w.get_value(), b.get_value()
+# print w.get_value(), b.get_value()
 print("target values for D")
 print(D[1])
@@ -58,12 +60,14 @@ print("prediction on D")
 print(predict(D[0]))
 # Print the graph used in the slides
-theano.printing.pydotprint(predict,
+theano.printing.pydotprint(
-                           outfile="pics/logreg_pydotprint_predic.png",
+    predict, outfile="pics/logreg_pydotprint_predic.png", var_with_name_simple=True
-                           var_with_name_simple=True)
+)
-theano.printing.pydotprint(prediction,
+theano.printing.pydotprint(
+    prediction,
    outfile="pics/logreg_pydotprint_prediction.png",
-                          var_with_name_simple=True)
+    var_with_name_simple=True,
-theano.printing.pydotprint(train,
+)
-                           outfile="pics/logreg_pydotprint_train.png",
+theano.printing.pydotprint(
-                           var_with_name_simple=True)
+    train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True
+)
--- a/doc/hpcs2011_tutorial/presentation.tex
+++ b/doc/hpcs2011_tutorial/presentation.tex
@@ -169,7 +169,7 @@ HPCS 2011, Montr\'eal
 % gpu for exercices
 % Exercises 1 and how to download the files
    \item Real example
-% More info on T.grad
+% More info on tt.grad
 % Where are the optimization in the example?
 % Exercises 2: logreg\_example.py
    \item Theano Flags
@@ -518,7 +518,7 @@ Modify and execute the example to do this expression: a**2 + b**2 + 2*a*b
 \begin{Verbatim}[commandchars=\\\{\}]
 import numpy
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 rng = numpy.random
 N = 400
@@ -532,8 +532,8 @@ training_steps = 10000
  \frametitle{A Real Example: Logistic Regression}
 \begin{Verbatim}[commandchars=\\\{\}]
 {\color{gray}# Declare Theano symbolic variables}
-x = T.matrix("x")
+x = tt.matrix("x")
-y = T.vector("y")
+y = tt.vector("y")
 \codeHighlight{w = theano.shared(rng.randn(100), name="w")}
 \codeHighlight{b = theano.shared(0., name="b")}
 print "Initial model:"
@@ -545,32 +545,32 @@ print w.get_value(), b.get_value()
  \frametitle{A Real Example: Logistic Regression}
 \begin{Verbatim}[commandchars=\\\{\}]
 {\color{gray}# Declare Theano symbolic variables}
-{\color{gray}x = T.matrix("x")}
+{\color{gray}x = tt.matrix("x")}
-{\color{gray}y = T.vector("y")}
+{\color{gray}y = tt.vector("y")}
 {\color{gray}w = theano.shared(rng.randn(100), name="w")}
 {\color{gray}b = theano.shared(0., name="b")}
 {\color{gray}# Construct Theano expression graph}
-p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))    {\color{gray}# Probability that target = 1}
+p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))    {\color{gray}# Probability that target = 1}
 prediction = p_1 > 0.5                   {\color{gray}# The prediction thresholded}
 xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1){\color{gray}# Cross-entropy loss function}
 cost = xent.mean() + 0.01*(w**2).sum()   {\color{gray}# The cost to minimize}
-\codeHighlight{gw,gb = T.grad(cost, [w,b])}
+\codeHighlight{gw,gb = tt.grad(cost, [w,b])}
 \end{Verbatim}
 \end{frame}
 \begin{frame}[fragile]
  \frametitle{A Real Example: Logistic Regression}
 \begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}x = T.matrix("x")}
+{\color{gray}x = tt.matrix("x")}
-{\color{gray}y = T.vector("y")}
+{\color{gray}y = tt.vector("y")}
 {\color{gray}w = theano.shared(rng.randn(100), name="w")}
 {\color{gray}b = theano.shared(0., name="b")}
-{\color{gray}p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))}
+{\color{gray}p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))}
 {\color{gray}prediction = p_1 > 0.5}
 {\color{gray}xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)}
 {\color{gray}cost = xent.mean() + 0.01*(w**2).sum()}
-{\color{gray}gw,gb = T.grad(cost, [w,b])}
+{\color{gray}gw,gb = tt.grad(cost, [w,b])}
 {\color{gray}# Compile}
 train = theano.function(
@@ -598,11 +598,11 @@ print "prediction on D:", predict(D[0])
 \begin{frame}[fragile]
  \frametitle{A Real Example: optimization}
 \begin{Verbatim}[commandchars=\\\{\}]
-p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))
+p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))
 xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
 prediction = p_1 > 0.5
 cost = xent.mean() + 0.01*(w**2).sum()
-gw,gb = T.grad(cost, [w,b])
+gw,gb = tt.grad(cost, [w,b])
 train = theano.function(
            inputs=[x,y],
@@ -612,7 +612,7 @@ train = theano.function(
 Where are those optimization applied?
 \begin{itemize}
 \item Log(1+exp(x))
-\item 1 / (1 + T.exp(var)) (sigmoid)
+\item 1 / (1 + tt.exp(var)) (sigmoid)
 \item Log(1-sigmoid(var)) (softplus, stabilisation)
 \item GEMV (matrix-vector multiply from BLAS)
 \item Loop fusion
@@ -622,14 +622,14 @@ Where are those optimization applied?
 \begin{frame}[fragile]
  \frametitle{A Real Example: optimization!}
 \begin{Verbatim}[commandchars=\\\{\}]
-p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))
+p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))
-\codeHighlight{# 1 / (1 + T.exp(var)) -> sigmoid(var)}
+\codeHighlight{# 1 / (1 + tt.exp(var)) -> sigmoid(var)}
 xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
 \codeHighlight{# Log(1-sigmoid(var)) -> -sigmoid(var)}
 prediction = p_1 > 0.5
 cost = xent.mean() + 0.01*(w**2).sum()
-gw,gb = T.grad(cost, [w,b])
+gw,gb = tt.grad(cost, [w,b])
 train = theano.function(
            inputs=[x,y],
@@ -727,18 +727,18 @@ Computers in the class
  \begin{itemize}
  \item \# Dimensions
    \begin{itemize}
-    \item T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4
+    \item tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
    \end{itemize}
  \item Dtype
    \begin{itemize}
-    \item T.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
+    \item tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
-    \item T.vector $\to$ floatX dtype
+    \item tt.vector $\to$ floatX dtype
    \item floatX: configurable dtype that can be float32 or float64.
    \end{itemize}
  \item Custom variable
    \begin{itemize}
-    \item All are shortcuts to: T.tensor(dtype, broadcastable=[False]*nd)
+    \item All are shortcuts to: tt.tensor(dtype, broadcastable=[False]*nd)
    \item Other dtype: uint[8,16,32,64], floatX
    \end{itemize}
  \end{itemize}
@@ -754,8 +754,8 @@ Computers in the class
  \vfill
  \begin{itemize}
  \item Broadcastability must be specified when creating the variable
-  \item The only shorcut with broadcastable dimensions are: {\bf T.row} and {\bf T.col}
+  \item The only shorcut with broadcastable dimensions are: {\bf tt.row} and {\bf tt.col}
-  \item For all others: T.tensor(dtype, broadcastable={\bf ([False or True])*nd})
+  \item For all others: tt.tensor(dtype, broadcastable={\bf ([False or True])*nd})
  \end{itemize}
 }
@@ -763,12 +763,12 @@ Computers in the class
 \begin{frame}[fragile]
  \frametitle{Differentiation Details}
 \begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}gw,gb = T.grad(cost, [w,b])}
+{\color{gray}gw,gb = tt.grad(cost, [w,b])}
 \end{Verbatim}
 \begin{itemize}
-\item T.grad works symbolically: takes and returns a Theano variable
+\item tt.grad works symbolically: takes and returns a Theano variable
-\item T.grad can be compared to a macro: it can be applied multiple times
+\item tt.grad can be compared to a macro: it can be applied multiple times
-\item T.grad takes scalar costs only
+\item tt.grad takes scalar costs only
 \item Simple recipe allows to compute efficiently vector $\times$ Jacobian and vector $\times$ Hessian
 \item We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian $\times$ vector
 \end{itemize}
@@ -1123,7 +1123,7 @@ All pydotprint* requires graphviz and pydot
 \begin{frame}[fragile]
 \frametitle{Scan Example: Computing pow(A,k)}
 \begin{Verbatim}
-k = T.iscalar("k"); A = T.vector("A")
+k = tt.iscalar("k"); A = tt.vector("A")
 def inner_fct(prior_result, A): return prior_result * A
 # Symbolic description of the result
@@ -1147,7 +1147,7 @@ print power(range(10),2)
 \frametitle{Scan Example: Calculating a Polynomial}
 \begin{Verbatim}
 coefficients = theano.tensor.vector("coefficients")
-x = T.scalar("x"); max_coefficients_supported = 10000
+x = tt.scalar("x"); max_coefficients_supported = 10000
 # Generate the components of the polynomial
 full_range=theano.tensor.arange(max_coefficients_supported)

--- a/doc/hpcs2011_tutorial/scan_poly.py
+++ b/doc/hpcs2011_tutorial/scan_poly.py
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 coefficients = theano.tensor.vector("coefficients")
-x = T.scalar("x"); max_coefficients_supported = 10000
+x = tt.scalar("x")
+max_coefficients_supported = 10000
 # Generate the components of the polynomial
-full_range=theano.tensor.arange(max_coefficients_supported)
+full_range = theano.tensor.arange(max_coefficients_supported)
-components, updates = theano.scan(fn=lambda coeff, power, free_var:
+components, updates = theano.scan(
-                                     coeff * (free_var ** power),
+    fn=lambda coeff, power, free_var: coeff * (free_var ** power),
    outputs_info=None,
    sequences=[coefficients, full_range],
-                                  non_sequences=x)
+    non_sequences=x,
+)
 polynomial = components.sum()
-calculate_polynomial = theano.function(inputs=[coefficients, x],
+calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=polynomial)
-                                       outputs=polynomial)
 test_coeff = np.asarray([1, 0, 2], dtype=np.float32)
 print(calculate_polynomial(test_coeff, 3))

--- a/doc/hpcs2011_tutorial/scan_pow.py
+++ b/doc/hpcs2011_tutorial/scan_pow.py
 import theano
-import theano.tensor as T
+import theano.tensor as tt
+k = tt.iscalar("k")
+A = tt.vector("A")
+def inner_fct(prior_result, A):
+    return prior_result * A
-k = T.iscalar("k"); A = T.vector("A")
-def inner_fct(prior_result, A): return prior_result * A
 # Symbolic description of the result
-result, updates = theano.scan(fn=inner_fct,
+result, updates = theano.scan(
-                              outputs_info=T.ones_like(A),
+    fn=inner_fct, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k
-                              non_sequences=A, n_steps=k)
+)
 # Scan has provided us with A**1 through A**k.  Keep only the last
 # value. Scan notices this and does not waste memory saving them.
 final_result = result[-1]
-power = theano.function(inputs=[A,k], outputs=final_result,
+power = theano.function(inputs=[A, k], outputs=final_result, updates=updates)
-                        updates=updates)
 print(power(list(range(10)), 2))
--- a/doc/library/compile/io.txt
+++ b/doc/library/compile/io.txt
@@ -80,10 +80,10 @@ A non-None `value` argument makes an In() instance an optional parameter
 of the compiled function.  For example, in the following code we are
 defining an arity-2 function ``inc``.
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> from theano import function
 >>> from theano.compile.io import In
->>> u, x, s = T.scalars('u', 'x', 's')
+>>> u, x, s = tt.scalars('u', 'x', 's')
 >>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
 Since we provided a ``value`` for ``s`` and ``x``, we can call it with just a value for ``u`` like this:
@@ -183,8 +183,8 @@ method to access values by indexing a Function directly by typing
 To show some examples of these access methods...
->>> from theano import tensor as T, function
+>>> from theano import tensor as tt, function
->>> a, b, c = T.scalars('xys') # set the internal names of graph nodes
+>>> a, b, c = tt.scalars('xys') # set the internal names of graph nodes
 >>> # Note that the name of c is 's', not 'c'!
 >>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
@@ -236,12 +236,12 @@ Every element of the inputs list will be upgraded to an In instance if necessary
 Example:
 >>> import theano
->>> from theano import tensor as T
+>>> from theano import tensor as tt
 >>> from theano.compile.io import In
->>> x = T.scalar()
+>>> x = tt.scalar()
->>> y = T.scalar('y')
+>>> y = tt.scalar('y')
->>> z = T.scalar('z')
+>>> z = tt.scalar('z')
->>> w = T.scalar('w')
+>>> w = tt.scalar('w')
 >>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
 ...                      outputs=x + y + z)
@@ -308,7 +308,7 @@ If a list of ``Variable`` or ``Out`` instances is given as argument, then the co
 >>> import numpy
 >>> from theano.compile.io import Out
->>> x, y, s = T.matrices('xys')
+>>> x, y, s = tt.matrices('xys')
 >>> # print a list of 2 ndarrays
 >>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])

--- a/doc/library/compile/nanguardmode.txt
+++ b/doc/library/compile/nanguardmode.txt
@@ -25,12 +25,12 @@ NanGuardMode can be used as follows:
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    from theano.compile.nanguardmode import NanGuardMode
-    x = T.matrix()
+    x = tt.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
-    y = T.dot(x, w)
+    y = tt.dot(x, w)
    fun = theano.function(
        [x], y,
        mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)

--- a/doc/library/d3viz/index.ipynb
+++ b/doc/library/d3viz/index.ipynb
@@ -72,7 +72,7 @@
   "outputs": [],
   "source": [
    "import theano as th\n",
-    "import theano.tensor as T\n",
+    "import theano.tensor as tt\n",
    "import numpy as np"
   ]
  },
@@ -97,14 +97,14 @@
    "nhiddens = 50\n",
    "\n",
    "rng = np.random.RandomState(0)\n",
-    "x = T.dmatrix('x')\n",
+    "x = tt.dmatrix('x')\n",
    "wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)\n",
    "bh = th.shared(np.zeros(nhiddens), borrow=True)\n",
-    "h = T.nnet.sigmoid(T.dot(x, wh) + bh)\n",
+    "h = tt.nnet.sigmoid(tt.dot(x, wh) + bh)\n",
    "\n",
    "wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))\n",
    "by = th.shared(np.zeros(noutputs), borrow=True)\n",
-    "y = T.nnet.softmax(T.dot(h, wy) + by)\n",
+    "y = tt.nnet.softmax(tt.dot(h, wy) + by)\n",
    "\n",
    "predict = th.function([x], y)"
   ]
@@ -389,8 +389,8 @@
   },
   "outputs": [],
   "source": [
-    "x, y, z = T.scalars('xyz')\n",
+    "x, y, z = tt.scalars('xyz')\n",
-    "e = T.nnet.sigmoid((x + y + z)**2)\n",
+    "e = tt.nnet.sigmoid((x + y + z)**2)\n",
    "op = th.OpFromGraph([x, y, z], [e])\n",
    "\n",
    "e2 = op(x, y, z) + op(z, y, x)\n",
@@ -434,7 +434,7 @@
   },
   "outputs": [],
   "source": [
-    "x, y, z = T.scalars('xyz')\n",
+    "x, y, z = tt.scalars('xyz')\n",
    "e = x * y\n",
    "op = th.OpFromGraph([x, y], [e])\n",
    "e2 = op(x, y) + z\n",

--- a/doc/library/d3viz/index.txt
+++ b/doc/library/d3viz/index.txt
@@ -54,7 +54,7 @@ hidden layer and a softmax output layer.
 .. code:: python
    import theano as th
-    import theano.tensor as T
+    import theano.tensor as tt
    import numpy as np
    ninputs = 1000
@@ -63,14 +63,14 @@ hidden layer and a softmax output layer.
    nhiddens = 50
    rng = np.random.RandomState(0)
-    x = T.dmatrix('x')
+    x = tt.dmatrix('x')
    wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
    bh = th.shared(np.zeros(nhiddens), borrow=True)
-    h = T.nnet.sigmoid(T.dot(x, wh) + bh)
+    h = tt.nnet.sigmoid(tt.dot(x, wh) + bh)
    wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))
    by = th.shared(np.zeros(noutputs), borrow=True)
-    y = T.nnet.softmax(T.dot(h, wy) + by)
+    y = tt.nnet.softmax(tt.dot(h, wy) + by)
    predict = th.function([x], y)
@@ -218,8 +218,8 @@ defines a nested graph, which will be visualized accordingly by
 .. code:: python
-    x, y, z = T.scalars('xyz')
+    x, y, z = tt.scalars('xyz')
-    e = T.nnet.sigmoid((x + y + z)**2)
+    e = tt.nnet.sigmoid((x + y + z)**2)
    op = th.OpFromGraph([x, y, z], [e])
    e2 = op(x, y, z) + op(z, y, x)
@@ -247,7 +247,7 @@ the following example.
 .. code:: python
-    x, y, z = T.scalars('xyz')
+    x, y, z = tt.scalars('xyz')
    e = x * y
    op = th.OpFromGraph([x, y], [e])
    e2 = op(x, y) + z

--- a/doc/library/gpuarray/fft.txt
+++ b/doc/library/gpuarray/fft.txt
@@ -29,10 +29,10 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must
    import numpy as np
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    from theano.gpuarray import fft
-    x = T.matrix('x', dtype='float32')
+    x = tt.matrix('x', dtype='float32')
    rfft = fft.curfft(x, norm='ortho')
    f_rfft = theano.function([x], rfft)

--- a/doc/library/printing.txt
+++ b/doc/library/printing.txt
@@ -23,8 +23,8 @@ Intermediate values in a computation cannot be printed in
 the normal python way with the print statement, because Theano has no *statements*.
 Instead there is the :class:`Print` Op.
->>> from theano import tensor as T, function, printing
+>>> from theano import tensor as tt, function, printing
->>> x = T.dvector()
+>>> x = tt.dvector()
 >>> hello_world_op = printing.Print('hello world')
 >>> printed_x = hello_world_op(x)
 >>> f = function([x], printed_x)
@@ -51,17 +51,17 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image
 1) The first is :func:`theano.pp`.
->>> from theano import pp, tensor as T
+>>> from theano import pp, tensor as tt
->>> x = T.dscalar('x') 
+>>> x = tt.dscalar('x')
 >>> y = x ** 2
->>> gy = T.grad(y, x)
+>>> gy = tt.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = function([x], gy)
 >>> pp(f.maker.fgraph.outputs[0])
 '(TensorConstant{2.0} * x)'
-The parameter in T.dscalar('x') in the first line is the name of this variable 
+The parameter in tt.dscalar('x') in the first line is the name of this variable
 in the graph. This name is used when printing the graph to make it more readable.
 If no name is provided the variable x is printed as its type as returned by
 x.type(). In this example - <TensorType(float64, scalar)>.
@@ -192,4 +192,3 @@ Reference
 .. autofunction:: theano.printing.pp(*args)
 .. autofunction:: theano.printing.pydotprint
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -38,10 +38,10 @@ The equivalent Theano code would be:
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
-  k = T.iscalar("k")
+  k = tt.iscalar("k")
-  A = T.vector("A")
+  A = tt.vector("A")
  # Symbolic description of the result
  result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
@@ -103,7 +103,7 @@ from a list of its coefficients:
    import numpy
    coefficients = theano.tensor.vector("coefficients")
-    x = T.scalar("x")
+    x = tt.scalar("x")
    max_coefficients_supported = 10000
@@ -164,21 +164,21 @@ downcast** of the latter.
    import numpy as np
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
-    up_to = T.iscalar("up_to")
+    up_to = tt.iscalar("up_to")
    # define a named function, rather than using lambda
    def accumulate_by_adding(arange_val, sum_to_date):
        return sum_to_date + arange_val
-    seq = T.arange(up_to)
+    seq = tt.arange(up_to)
    # An unauthorized implicit downcast from the dtype of 'seq', to that of
    # 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
    # if this instruction were to be used instead of the next one:
-    # outputs_info = T.as_tensor_variable(0)
+    # outputs_info = tt.as_tensor_variable(0)
-    outputs_info = T.as_tensor_variable(np.asarray(0, seq.dtype))
+    outputs_info = tt.as_tensor_variable(np.asarray(0, seq.dtype))
    scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
                                            outputs_info=outputs_info,
                                            sequences=seq)
@@ -206,14 +206,14 @@ with all values set to zero except at the provided array indices.
 .. testcode::
-    location = T.imatrix("location")
+    location = tt.imatrix("location")
-    values = T.vector("values")
+    values = tt.vector("values")
-    output_model = T.matrix("output_model")
+    output_model = tt.matrix("output_model")
    def set_value_at_position(a_location, a_value, output_model):
-        zeros = T.zeros_like(output_model)
+        zeros = tt.zeros_like(output_model)
        zeros_subtensor = zeros[a_location[0], a_location[1]]
-        return T.set_subtensor(zeros_subtensor, a_value)
+        return tt.set_subtensor(zeros_subtensor, a_value)
    result, updates = theano.scan(fn=set_value_at_position,
                                  outputs_info=None,
@@ -265,7 +265,7 @@ the following:
 .. testcode:: scan1
   import theano
-   from theano import tensor as T
+   from theano import tensor as tt
   W = theano.shared(W_values) # we assume that ``W_values`` contains the
                               # initial values of your weight matrix
@@ -273,12 +273,12 @@ the following:
   bvis = theano.shared(bvis_values)
   bhid = theano.shared(bhid_values)
-   trng = T.shared_randomstreams.RandomStreams(1234)
+   trng = tt.shared_randomstreams.RandomStreams(1234)
   def OneStep(vsample) :
-       hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+       hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
       hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-       vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+       vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
       return trng.binomial(size=vsample.shape, n=1, p=vmean,
                            dtype=theano.config.floatX)
@@ -354,13 +354,13 @@ updated:
    bvis = theano.shared(bvis_values)
    bhid = theano.shared(bhid_values)
-    trng = T.shared_randomstreams.RandomStreams(1234)
+    trng = tt.shared_randomstreams.RandomStreams(1234)
    # OneStep, with explicit use of the shared variables (W, bvis, bhid)
    def OneStep(vsample, W, bvis, bhid):
-        hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
        hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
        return trng.binomial(size=vsample.shape, n=1, p=vmean,
                         dtype=theano.config.floatX)
@@ -394,9 +394,9 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
    # Same OneStep as in original example.
    def OneStep(vsample) :
-        hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
        hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
        return trng.binomial(size=vsample.shape, n=1, p=vmean,
                             dtype=theano.config.floatX)
@@ -423,9 +423,9 @@ variables passed explicitly to ``OneStep`` and to scan:
    # OneStep, with explicit use of the shared variables (W, bvis, bhid)
    def OneStep(vsample, W, bvis, bhid) :
-        hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
        hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
        return trng.binomial(size=vsample.shape, n=1, p=vmean,
                             dtype=theano.config.floatX)
@@ -465,13 +465,13 @@ construct a function that computes one iteration step :
 .. testsetup:: scan3
   import theano
-   from theano import tensor as T
+   from theano import tensor as tt
 .. testcode:: scan3
  def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2,  W_feedback, W_out):
-    x_t = T.tanh(theano.dot(x_tm1, W) + \
+    x_t = tt.tanh(theano.dot(x_tm1, W) + \
                 theano.dot(u_t,   W_in_1) + \
                 theano.dot(u_tm4, W_in_2) + \
                 theano.dot(y_tm1, W_feedback))
@@ -492,16 +492,16 @@ the Theano variables needed we construct our RNN as follows :
 .. testcode:: scan3
-   W = T.matrix()
+   W = tt.matrix()
-   W_in_1 = T.matrix()
+   W_in_1 = tt.matrix()
-   W_in_2 = T.matrix()
+   W_in_2 = tt.matrix()
-   W_feedback = T.matrix()
+   W_feedback = tt.matrix()
-   W_out = T.matrix()
+   W_out = tt.matrix()
-   u = T.matrix() # it is a sequence of vectors
+   u = tt.matrix() # it is a sequence of vectors
-   x0 = T.matrix() # initial state of x has to be a matrix, since
+   x0 = tt.matrix() # initial state of x has to be a matrix, since
                   # it has to cover x[-3]
-   y0 = T.vector() # y0 is just a vector since scan has only to provide
+   y0 = tt.vector() # y0 is just a vector since scan has only to provide
                   # y[-1]
@@ -541,9 +541,9 @@ value ``max_value``.
    def power_of_2(previous_power, max_value):
        return previous_power*2, theano.scan_module.until(previous_power*2 > max_value)
-    max_value = T.scalar()
+    max_value = tt.scalar()
    values, _ = theano.scan(power_of_2,
-                            outputs_info = T.constant(1.),
+                            outputs_info = tt.constant(1.),
                            non_sequences = max_value,
                            n_steps = 1024)

--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -9,7 +9,7 @@ Basic Tensor Functionality
 .. testsetup::
   import theano
-   import theano.tensor as T
+   import theano.tensor as tt
   from theano.tensor import scalar, iscalar, TensorType, dmatrix, ivector
   from theano.tensor import set_subtensor, inc_subtensor, batched_dot
   from theano import shared
@@ -19,12 +19,12 @@ Basic Tensor Functionality
 Theano supports any kind of Python object, but its focus is support for
 symbolic matrix expressions.  When you type,
->>> x = T.fmatrix()
+>>> x = tt.fmatrix()
 the ``x`` is a :class:`TensorVariable` instance.
-The ``T.fmatrix`` object itself is an instance of :class:`TensorType`.
+The ``tt.fmatrix`` object itself is an instance of :class:`TensorType`.
 Theano knows what type of variable ``x`` is because ``x.type``
-points back to ``T.fmatrix``.
+points back to ``tt.fmatrix``.
 This chapter explains the various ways of creating tensor variables,
 the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
@@ -531,7 +531,7 @@ TensorVariable
        Transpose of this tensor.
-        >>> x = T.zmatrix()
+        >>> x = tt.zmatrix()
        >>> y = 3+.2j * x.T
        .. note::
@@ -824,10 +824,10 @@ Creating Tensor
    :param tensors: one or more tensors of the same rank
    :returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
-    >>> x0 = T.scalar()
+    >>> x0 = tt.scalar()
-    >>> x1 = T.scalar()
+    >>> x1 = tt.scalar()
-    >>> x2 = T.scalar()
+    >>> x2 = tt.scalar()
-    >>> x = T.stack(x0, x1, x2)
+    >>> x = tt.stack(x0, x1, x2)
    >>> x.ndim # x is a vector of length 3.
    1
@@ -840,10 +840,10 @@ Creating Tensor
    :param axis: Tensors will be joined along this axis, so they may have different
        ``shape[axis]``
-    >>> x0 = T.fmatrix()
+    >>> x0 = tt.fmatrix()
-    >>> x1 = T.ftensor3()
+    >>> x1 = tt.ftensor3()
-    >>> x2 = T.fvector()
+    >>> x2 = tt.fvector()
-    >>> x = T.concatenate([x0, x1[0], T.shape_padright(x2)], axis=1)
+    >>> x = tt.concatenate([x0, x1[0], tt.shape_padright(x2)], axis=1)
    >>> x.ndim
    2
@@ -1151,7 +1151,7 @@ Operator Support
 Many Python operators are supported.
->>> a, b = T.itensor3(), T.itensor3() # example inputs
+>>> a, b = tt.itensor3(), tt.itensor3() # example inputs
 Arithmetic
 --------------
@@ -1159,13 +1159,13 @@ Arithmetic
 .. doctest::
   :options: +SKIP
-   >>> a + 3      # T.add(a, 3) -> itensor3
+   >>> a + 3      # tt.add(a, 3) -> itensor3
-   >>> 3 - a      # T.sub(3, a)
+   >>> 3 - a      # tt.sub(3, a)
-   >>> a * 3.5    # T.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
+   >>> a * 3.5    # tt.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
-   >>> 2.2 / a    # T.truediv(2.2, a)
+   >>> 2.2 / a    # tt.truediv(2.2, a)
-   >>> 2.2 // a   # T.intdiv(2.2, a)
+   >>> 2.2 // a   # tt.intdiv(2.2, a)
-   >>> 2.2**a     # T.pow(2.2, a)
+   >>> 2.2**a     # tt.pow(2.2, a)
-   >>> b % a      # T.mod(b, a)
+   >>> b % a      # tt.mod(b, a)
 Bitwise
 -------------
@@ -1173,10 +1173,10 @@ Bitwise
 .. doctest::
   :options: +SKIP
-   >>> a & b      # T.and_(a,b)    bitwise and (alias T.bitwise_and)
+   >>> a & b      # tt.and_(a,b)    bitwise and (alias tt.bitwise_and)
-   >>> a ^ 1      # T.xor(a,1)     bitwise xor (alias T.bitwise_xor)
+   >>> a ^ 1      # tt.xor(a,1)     bitwise xor (alias tt.bitwise_xor)
-   >>> a | b      # T.or_(a,b)     bitwise or (alias T.bitwise_or)
+   >>> a | b      # tt.or_(a,b)     bitwise or (alias tt.bitwise_or)
-   >>> ~a         # T.invert(a)    bitwise invert (alias T.bitwise_not)
+   >>> ~a         # tt.invert(a)    bitwise invert (alias tt.bitwise_not)
 Inplace
 -------------
@@ -1205,9 +1205,9 @@ Casting
    .. testcode:: cast
-        import theano.tensor as T
+        import theano.tensor as tt
-        x = T.matrix()
+        x = tt.matrix()
-        x_as_int = T.cast(x, 'int32')
+        x_as_int = tt.cast(x, 'int32')
    Attempting to casting a complex value to a real value is ambiguous and
    will raise an exception.  Use `real()`, `imag()`, `abs()`, or `angle()`.
@@ -1241,9 +1241,9 @@ The six usual equality and inequality operators share the same interface.
  .. testcode:: oper
-    import theano.tensor as T
+    import theano.tensor as tt
-    x,y = T.dmatrices('x','y')
+    x,y = tt.dmatrices('x','y')
-    z = T.le(x,y)
+    z = tt.le(x,y)
 .. function:: lt(a, b)
@@ -1334,10 +1334,10 @@ Condition
    .. testcode:: switch
-      import theano.tensor as T
+      import theano.tensor as tt
-      a,b = T.dmatrices('a','b')
+      a,b = tt.dmatrices('a','b')
-      x,y = T.dmatrices('x','y')
+      x,y = tt.dmatrices('x','y')
-      z = T.switch(T.lt(a,b), x, y)
+      z = tt.switch(tt.lt(a,b), x, y)
 .. function:: where(cond, ift, iff)
@@ -1405,8 +1405,8 @@ Here is an example using the bit-wise ``and_`` via the ``&`` operator:
 .. testcode:: bitwise
-    import theano.tensor as T
+    import theano.tensor as tt
-    x,y = T.imatrices('x','y')
+    x,y = tt.imatrices('x','y')
    z = x & y
@@ -1655,8 +1655,8 @@ Linear Algebra
    Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
    Example:
-    >>> first = T.tensor3('first')
+    >>> first = tt.tensor3('first')
-    >>> second = T.tensor3('second')
+    >>> second = tt.tensor3('second')
    >>> result = batched_dot(first, second)
    :note:  This is a subset of numpy.einsum, but we do not provide it for now.
@@ -1715,7 +1715,7 @@ Linear Algebra
    Example:
-    >>> a = T.mgrid[0:5, 0:3]
+    >>> a = tt.mgrid[0:5, 0:3]
    >>> a[0].eval()
    array([[0, 0, 0],
           [1, 1, 1],
@@ -1739,7 +1739,7 @@ Linear Algebra
    Example:
-    >>> b = T.ogrid[0:5, 0:3]
+    >>> b = tt.ogrid[0:5, 0:3]
    >>> b[0].eval()
    array([[0],
           [1],

--- a/doc/library/tensor/fft.txt
+++ b/doc/library/tensor/fft.txt
@@ -24,10 +24,10 @@ oscillates due to the box function being shifted to the middle of the array.
    import numpy as np
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    from theano.tensor import fft
-    x = T.matrix('x', dtype='float64')
+    x = tt.matrix('x', dtype='float64')
    rfft = fft.rfft(x, norm='ortho')
    f_rfft = theano.function([x], rfft)

--- a/doc/library/tensor/nnet/nnet.txt
+++ b/doc/library/tensor/nnet/nnet.txt
@@ -50,11 +50,11 @@
   .. testcode::
-       import theano.tensor as T
+       import theano.tensor as tt
-       x, y, b = T.dvectors('x', 'y', 'b')
+       x, y, b = tt.dvectors('x', 'y', 'b')
-       W = T.dmatrix('W')
+       W = tt.dmatrix('W')
-       y = T.nnet.sigmoid(T.dot(W, x) + b)
+       y = tt.nnet.sigmoid(tt.dot(W, x) + b)
   .. note:: The underlying code will return an exact 0 or 1 if an
      element of x is too small or too big.
@@ -112,9 +112,9 @@
   .. testcode::
-       x,y,b = T.dvectors('x','y','b')
+       x,y,b = tt.dvectors('x','y','b')
-       W = T.dmatrix('W')
+       W = tt.dmatrix('W')
-       y = T.nnet.softplus(T.dot(W,x) + b)
+       y = tt.nnet.softplus(tt.dot(W,x) + b)
 .. function:: softsign(x)
@@ -143,9 +143,9 @@
   .. testcode::
-       x,y,b = T.dvectors('x','y','b')
+       x,y,b = tt.dvectors('x','y','b')
-       W = T.dmatrix('W')
+       W = tt.dmatrix('W')
-       y = T.nnet.softmax(T.dot(W,x) + b)
+       y = tt.nnet.softmax(tt.dot(W,x) + b)
 .. autofunction:: theano.tensor.nnet.relu
@@ -171,12 +171,12 @@
   .. testcode::
-       x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
+       x, y, b, c = tt.dvectors('x', 'y', 'b', 'c')
-       W = T.dmatrix('W')
+       W = tt.dmatrix('W')
-       V = T.dmatrix('V')
+       V = tt.dmatrix('V')
-       h = T.nnet.sigmoid(T.dot(W, x) + b)
+       h = tt.nnet.sigmoid(tt.dot(W, x) + b)
-       x_recons = T.nnet.sigmoid(T.dot(V, h) + c)
+       x_recons = tt.nnet.sigmoid(tt.dot(V, h) + c)
-       recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean()
+       recon_cost = tt.nnet.binary_crossentropy(x_recons, x).mean()
 .. function:: sigmoid_binary_crossentropy(output,target)
@@ -200,14 +200,14 @@
   .. testcode::
-       x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
+       x, y, b, c = tt.dvectors('x', 'y', 'b', 'c')
-       W = T.dmatrix('W')
+       W = tt.dmatrix('W')
-       V = T.dmatrix('V')
+       V = tt.dmatrix('V')
-       h = T.nnet.sigmoid(T.dot(W, x) + b)
+       h = tt.nnet.sigmoid(tt.dot(W, x) + b)
-       x_precons = T.dot(V, h) + c
+       x_precons = tt.dot(V, h) + c
       # final reconstructions are given by sigmoid(x_precons), but we leave
       # them unnormalized as sigmoid_binary_crossentropy applies sigmoid
-       recon_cost = T.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
+       recon_cost = tt.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
 .. function:: categorical_crossentropy(coding_dist,true_dist)
@@ -244,8 +244,8 @@
   .. testcode::
-       y = T.nnet.softmax(T.dot(W, x) + b)
+       y = tt.nnet.softmax(tt.dot(W, x) + b)
-       cost = T.nnet.categorical_crossentropy(y, o)
+       cost = tt.nnet.categorical_crossentropy(y, o)
       # o is either the above-mentioned 1-of-N vector or 2D tensor

--- a/doc/nextml2015/presentation.tex
+++ b/doc/nextml2015/presentation.tex
@@ -271,15 +271,15 @@ Some example of scalar operations:
        }
 \begin{lstlisting}
 import theano
-from theano import tensor as T
+from theano import tensor as tt
-x = T.scalar()
+x = tt.scalar()
-y = T.scalar()
+y = tt.scalar()
 z = x+y
 w = z*x
-a = T.sqrt(w)
+a = tt.sqrt(w)
-b = T.exp(a)
+b = tt.exp(a)
 c = a ** b
-d = T.log(c)
+d = tt.log(c)
 \end{lstlisting}
 \end{frame}
@@ -291,13 +291,13 @@ d = T.log(c)
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
-from theano import tensor as T
+from theano import tensor as tt
-x = T.vector()
+x = tt.vector()
-y = T.vector()
+y = tt.vector()
 # Scalar math applied elementwise
 a = x * y
 # Vector dot product
-b = T.dot(x, y)
+b = tt.dot(x, y)
 # Broadcasting (as NumPy, very powerful)
 c = a + b
 \end{lstlisting}
@@ -311,14 +311,14 @@ c = a + b
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
-from theano import tensor as T
+from theano import tensor as tt
-x = T.matrix()
+x = tt.matrix()
-y = T.matrix()
+y = tt.matrix()
-a = T.vector()
+a = tt.vector()
 # Matrix-matrix product
-b = T.dot(x, y)
+b = tt.dot(x, y)
 # Matrix-vector product
-c = T.dot(x, a)
+c = tt.dot(x, a)
 \end{lstlisting}
 \end{frame}
@@ -336,11 +336,11 @@ c = T.dot(x, a)
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
-from theano import tensor as T
+from theano import tensor as tt
-tensor3 = T.TensorType(
+tensor3 = tt.TensorType(
    broadcastable=(False, False, False),
    dtype='float32')
-x = T.tensor3()
+x = tt.tensor3()
 \end{lstlisting}
 \end{frame}
@@ -351,8 +351,8 @@ x = T.tensor3()
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
-from theano import tensor as T
+from theano import tensor as tt
-tensor3 = T.TensorType(
+tensor3 = tt.TensorType(
    broadcastable=(False, False, False),
    dtype='float32')
 x = tensor3()
@@ -370,13 +370,13 @@ mx = x.max(axis=1)
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
-from theano import tensor as T
+from theano import tensor as tt
-tensor3 = T.TensorType(
+tensor3 = tt.TensorType(
    broadcastable=(False, False, False))
 x = tensor3()
 y = x.dimshuffle((2, 1, 0))
-a = T.matrix()
+a = tt.matrix()
-b = a.T
+b = a.tt
 # Same as b
 c = a.dimshuffle((0, 1))
 # Adding to larger tensor
@@ -427,9 +427,9 @@ a_tensor[an_index_tensor, ...]
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
->>> from theano import tensor as T
+>>> from theano import tensor as tt
->>> x = T.scalar()
+>>> x = tt.scalar()
->>> y = T.scalar()
+>>> y = tt.scalar()
 >>> from theano import function
 >>> # first arg is list of SYMBOLIC inputs
 >>> # second arg is SYMBOLIC output
@@ -518,8 +518,8 @@ modes regard as fine.
  \item Theano current back-end only supports 32 bit on GPU
  \item libgpuarray (new-backend) support all dtype
  \item CUDA supports 64 bit, but is slow on gamer GPUs
-  \item T.fscalar, T.fvector, T.fmatrix are all 32 bit
+  \item tt.fscalar, tt.fvector, tt.fmatrix are all 32 bit
-  \item T.scalar, T.vector, T.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag
+  \item tt.scalar, tt.vector, tt.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag
  \item floatX is float64 by default, set it to float32
  \item Set device flag to gpu (or a specific gpu, like gpu0)
  \item Flag: warn\_float64={'ignore', 'warn', 'raise', 'pdb'}
@@ -547,9 +547,9 @@ modes regard as fine.
        stringstyle=\color{violet},
        }
 \begin{lstlisting}
->>> x = T.scalar('x')
+>>> x = tt.scalar('x')
 >>> y = 2. * x
->>> g = T.grad(y, x)
+>>> g = tt.grad(y, x)
 # Print the not optimized graph
 >>> theano.printing.pydotprint(g)
 \end{lstlisting}
@@ -559,7 +559,7 @@ modes regard as fine.
 %% \begin{frame}{Theano Variables}
 %%   \begin{itemize}
 %%   \item A Variable is a theano expression
-%%   \item Can come from T.scalar, T.matrix, etc.
+%%   \item Can come from tt.scalar, tt.matrix, etc.
 %%   \item Can come from doing operations on other Variables
 %%   \item Every Variable has a type field, identifying its Type \newline
 %%     e.g. TensorType((True, False), ‘float32’)
@@ -623,9 +623,9 @@ modes regard as fine.
 \begin{lstlisting}
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
-x = T.vector()
+x = tt.vector()
-y = T.vector()
+y = tt.vector()
 z = x + x
 z = z + y
 f = theano.function([x, y], z)
@@ -857,16 +857,16 @@ Elemwise{mul,no_inplace} [@A] ''
        }
 \begin{lstlisting}
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 import numpy as np
 # define tensor variables
-W = T.matrix("W")
+W = tt.matrix("W")
-X = T.matrix("X")
+X = tt.matrix("X")
-b_sym = T.vector("b_sym")
+b_sym = tt.vector("b_sym")
 # define shared random stream
-trng = T.shared_randomstreams.RandomStreams(1234)
+trng = tt.shared_randomstreams.RandomStreams(1234)
 d=trng.binomial(size=W[1].shape)
 \end{lstlisting}
 \end{frame}
@@ -881,7 +881,7 @@ d=trng.binomial(size=W[1].shape)
        }
 \begin{lstlisting}
 results, updates = theano.scan(
-    lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
+    lambda v: tt.tanh(tt.dot(v, W) + b_sym) * d,
    sequences=X)
 f = theano.function(inputs=[X, W, b_sym],
                    outputs=[results],
@@ -903,11 +903,11 @@ print f(x, w, b)
        }
 \begin{lstlisting}
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 theano.config.warn.subtensor_merge_bug = False
-k = T.iscalar("k")
+k = tt.iscalar("k")
-A = T.vector("A")
+A = tt.vector("A")
 def inner_fct(prior_result, B):
    return prior_result * B
@@ -949,7 +949,7 @@ print power(range(10), 2)
 result, updates = theano.scan(
    fn=inner_fct,
    sequences=[]
-    outputs_info=[T.ones_like(A)],
+    outputs_info=[tt.ones_like(A)],
    non_sequences=A,
    n_steps=k)
 \end{lstlisting}

--- a/doc/sandbox/logistic_regression_example.txt
+++ b/doc/sandbox/logistic_regression_example.txt
@@ -13,25 +13,25 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
    def build_logistic_regression_model(n_in, n_out, l2_coef=30.0)
        # DECLARE SOME VARIABLES
-        import tensor as T
+        import tensor as tt
-        x = T.matrix()  #our points, one point per row
+        x = tt.matrix()  #our points, one point per row
-        y = T.matrix()  #store our labels as place codes (label 3 of 5 is vector [00100])
+        y = tt.matrix()  #store our labels as place codes (label 3 of 5 is vector [00100])
-        w = T.matrix()  #the linear transform to apply to our input points
+        w = tt.matrix()  #the linear transform to apply to our input points
-        b = T.vector()  #a vector of biases, which make our transform affine instead of linear
+        b = tt.vector()  #a vector of biases, which make our transform affine instead of linear
-        stepsize = T.scalar('stepsize')  # a stepsize for gradient descent
+        stepsize = tt.scalar('stepsize')  # a stepsize for gradient descent
        # REGRESSION MODEL AND COSTS TO MINIMIZE
-        prediction = T.softmax(T.dot(x, w) + b)
+        prediction = tt.softmax(tt.dot(x, w) + b)
-        cross_entropy = T.sum(y * T.log(prediction), axis=1)
+        cross_entropy = tt.sum(y * tt.log(prediction), axis=1)
-        cost = T.sum(cross_entropy) + l2_coef * T.sum(T.sum(w*w))
+        cost = tt.sum(cross_entropy) + l2_coef * tt.sum(tt.sum(w*w))
        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
-        grad_w, grad_b = T.grad(cost, [w, b])
+        grad_w, grad_b = tt.grad(cost, [w, b])
        #
        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS

--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt
@@ -12,10 +12,10 @@ let's make a simple function: add two numbers together. Here is how you do
 it:
 >>> import numpy
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> from theano import function
->>> x = T.dscalar('x')
+>>> x = tt.dscalar('x')
->>> y = T.dscalar('y')
+>>> y = tt.dscalar('y')
 >>> z = x + y
 >>> f = function([x, y], z)
@@ -55,10 +55,10 @@ instruction. Behind the scene, *f* was being compiled into C code.
 **Step 1**
->>> x = T.dscalar('x')
+>>> x = tt.dscalar('x')
->>> y = T.dscalar('y')
+>>> y = tt.dscalar('y')
-In Theano, all symbols must be typed. In particular, ``T.dscalar``
+In Theano, all symbols must be typed. In particular, ``tt.dscalar``
 is the type we assign to "0-dimensional arrays (`scalar`) of doubles
 (`d`)". It is a Theano :ref:`type`.
@@ -72,12 +72,12 @@ field, as you can see here:
 <class 'theano.tensor.var.TensorVariable'>
 >>> x.type
 TensorType(float64, scalar)
->>> T.dscalar
+>>> tt.dscalar
 TensorType(float64, scalar)
->>> x.type is T.dscalar
+>>> x.type is tt.dscalar
 True
-By calling ``T.dscalar`` with a string argument, you create a
+By calling ``tt.dscalar`` with a string argument, you create a
 *Variable* representing a floating-point scalar quantity with the
 given name. If you provide no argument, the symbol will be unnamed. Names
 are not required, but they can help debugging.
@@ -124,9 +124,9 @@ then be used like a normal Python function.
    you to import :func:`function` . Here is how :func:`eval` works:
    >>> import numpy
-    >>> import theano.tensor as T
+    >>> import theano.tensor as tt
-    >>> x = T.dscalar('x')
+    >>> x = tt.dscalar('x')
-    >>> y = T.dscalar('y')
+    >>> y = tt.dscalar('y')
    >>> z = x + y
    >>> numpy.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
    True
@@ -149,8 +149,8 @@ You might already have guessed how to do this. Indeed, the only change
 from the previous example is that you need to instantiate *x* and
 *y* using the matrix Types:
->>> x = T.dmatrix('x')
+>>> x = tt.dmatrix('x')
->>> y = T.dmatrix('y')
+>>> y = tt.dmatrix('y')
 >>> z = x + y
 >>> f = function([x, y], z)

--- a/doc/tutorial/broadcasting.txt
+++ b/doc/tutorial/broadcasting.txt
@@ -2,7 +2,7 @@
   import numpy as np
   import theano
-   import theano.tensor as T
+   import theano.tensor as tt
 .. _tutbroadcasting:
@@ -39,10 +39,10 @@ information is given in the :ref:`type` of a *Variable*.
 The following code illustrates how rows and columns are broadcasted in order to perform an addition operation with a matrix:
->>> r = T.row()
+>>> r = tt.row()
 >>> r.broadcastable
 (True, False)
->>> mtr = T.matrix()
+>>> mtr = tt.matrix()
 >>> mtr.broadcastable
 (False, False)
 >>> f_row = theano.function([r, mtr], [r + mtr])
@@ -58,7 +58,7 @@ array([[0, 1, 2],
 [array([[  0.,   2.,   4.],
       [  3.,   5.,   7.],
       [  6.,   8.,  10.]])]
->>> c = T.col()
+>>> c = tt.col()
 >>> c.broadcastable
 (False, True)
 >>> f_col = theano.function([c, mtr], [c + mtr])
@@ -80,4 +80,3 @@ See also:
 * `SciPy documentation about numpy's broadcasting <http://www.scipy.org/EricsBroadcastingDoc>`_
 * `OnLamp article about numpy's broadcasting <http://www.onlamp.com/pub/a/python/2000/09/27/numerically.html>`_
--- a/doc/tutorial/conditions.txt
+++ b/doc/tutorial/conditions.txt
@@ -20,15 +20,15 @@ IfElse vs Switch
 .. testcode::
-   from theano import tensor as T
+   from theano import tensor as tt
   from theano.ifelse import ifelse
   import theano, time, numpy
-   a,b = T.scalars('a', 'b')
+   a,b = tt.scalars('a', 'b')
-   x,y = T.matrices('x', 'y')
+   x,y = tt.matrices('x', 'y')
-   z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
+   z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y))
-   z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))
+   z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y))
   f_switch = theano.function([a, b, x, y], z_switch,
                              mode=theano.Mode(linker='vm'))

--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -27,10 +27,10 @@ messages. Consider the following faulty code.
    import numpy as np
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
-    x = T.vector()
+    x = tt.vector()
-    y = T.vector()
+    y = tt.vector()
    z = x + x
    z = z + y
    f = theano.function([x, y], z)
@@ -103,7 +103,7 @@ following example. Here, we use ``exception_verbosity=high`` and
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    # compute_test_value is 'off' by default, meaning this feature is inactive
    theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature
@@ -115,7 +115,7 @@ following example. Here, we use ``exception_verbosity=high`` and
    W2 = theano.shared(W2val, 'W2')
    # input which will be of shape (5,10)
-    x  = T.matrix('x')
+    x  = tt.matrix('x')
    # provide Theano with a default test-value
    #x.tag.test_value = numpy.random.rand(5, 10)
@@ -124,10 +124,10 @@ following example. Here, we use ``exception_verbosity=high`` and
    func_of_W1 = W1.dimshuffle(2, 0, 1).flatten(2).T
    # source of error: dot product of 5x10 with 20x10
-    h1 = T.dot(x, func_of_W1)
+    h1 = tt.dot(x, func_of_W1)
    # do more stuff
-    h2 = T.dot(h1, W2.T)
+    h2 = tt.dot(h1, W2.T)
    # compile and call the actual function
    f = theano.function([x], h2)
@@ -172,7 +172,7 @@ so slightly, we can get Theano to reveal the exact source of the error.
    ...
    # input which will be of shape (5, 10)
-    x  = T.matrix('x')
+    x  = tt.matrix('x')
    # provide Theano with a default test-value
    x.tag.test_value = numpy.random.rand(5, 10)
@@ -187,7 +187,7 @@ following error message, which properly identifies *line 24* as the culprit.
    Traceback (most recent call last):
      File "test2.py", line 24, in <module>
-        h1 = T.dot(x, func_of_W1)
+        h1 = tt.dot(x, func_of_W1)
      File "PATH_TO_THEANO/theano/tensor/basic.py", line 4734, in dot
        return _dot(a, b)
      File "PATH_TO_THEANO/theano/gof/op.py", line 545, in __call__
@@ -225,12 +225,12 @@ It is also possible to override variables ``__repr__`` method to have them retur
 .. testsetup:: printtestvalue
   import theano
-   import theano.tensor as T
+   import theano.tensor as tt
 .. testcode:: printtestvalue
-   x = T.scalar('x')
+   x = tt.scalar('x')
   # Assigning test value
   x.tag.test_value = 42
@@ -485,10 +485,10 @@ Consider this example script ("ex.py"):
   import theano
   import numpy
-   import theano.tensor as T
+   import theano.tensor as tt
-   a = T.dmatrix('a')
+   a = tt.dmatrix('a')
-   b = T.dmatrix('b')
+   b = tt.dmatrix('b')
   f = theano.function([a, b], [a * b])

--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -41,9 +41,9 @@ Well, what you do is this:
 .. tests/test_tutorial.py:T_examples.test_examples_1
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
->>> x = T.dmatrix('x')
+>>> x = tt.dmatrix('x')
->>> s = 1 / (1 + T.exp(-x))
+>>> s = 1 / (1 + tt.exp(-x))
 >>> logistic = theano.function([x], s)
 >>> logistic([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
@@ -64,7 +64,7 @@ We can verify that this alternate form produces the same values:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_2
->>> s2 = (1 + T.tanh(x / 2)) / 2
+>>> s2 = (1 + tt.tanh(x / 2)) / 2
 >>> logistic2 = theano.function([x], s2)
 >>> logistic2([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
@@ -81,7 +81,7 @@ squared difference between two matrices *a* and *b* at the same time:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_3
->>> a, b = T.dmatrices('a', 'b')
+>>> a, b = tt.dmatrices('a', 'b')
 >>> diff = a - b
 >>> abs_diff = abs(diff)
 >>> diff_squared = diff**2
@@ -114,7 +114,7 @@ one. You can do it like this:
 >>> from theano import In
 >>> from theano import function
->>> x, y = T.dscalars('x', 'y')
+>>> x, y = tt.dscalars('x', 'y')
 >>> z = x + y
 >>> f = function([x, In(y, value=1)], z)
 >>> f(33)
@@ -135,7 +135,7 @@ be set positionally or by name, as in standard Python:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_7
->>> x, y, w = T.dscalars('x', 'y', 'w')
+>>> x, y, w = tt.dscalars('x', 'y', 'w')
 >>> z = (x + y) * w
 >>> f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
 >>> f(33)
@@ -180,7 +180,7 @@ internal state, and returns the old state value.
 >>> from theano import shared
 >>> state = shared(0)
->>> inc = T.iscalar('inc')
+>>> inc = tt.iscalar('inc')
 >>> accumulator = function([inc], state, updates=[(state, state+inc)])
 This code introduces a few new concepts.  The ``shared`` function constructs
@@ -257,7 +257,7 @@ for the purpose of one particular function.
 >>> fn_of_state = state * 2 + inc
 >>> # The type of foo must match the shared variable we are replacing
 >>> # with the ``givens``
->>> foo = T.scalar(dtype=state.dtype)
+>>> foo = tt.scalar(dtype=state.dtype)
 >>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
 >>> skip_shared(1, 3)  # we're using 3 for the state, not state.value
 array(7)
@@ -292,9 +292,9 @@ so compilation only needs to be performed once.
 Let's start from the accumulator defined above:
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> state = theano.shared(0)
->>> inc = T.iscalar('inc')
+>>> inc = tt.iscalar('inc')
 >>> accumulator = theano.function([inc], state, updates=[(state, state+inc)])
 We can use it to increment the state as usual:
@@ -463,7 +463,7 @@ to another is shown below.
 >>> from __future__ import print_function
 >>> import theano
 >>> import numpy
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> from theano.sandbox.rng_mrg import MRG_RandomStreams
 >>> from theano.tensor.shared_randomstreams import RandomStreams
@@ -533,7 +533,7 @@ It will be used repeatedly.
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    rng = numpy.random
    N = 400                                   # training sample size
@@ -544,8 +544,8 @@ It will be used repeatedly.
    training_steps = 10000
    # Declare Theano symbolic variables
-    x = T.dmatrix("x")
+    x = tt.dmatrix("x")
-    y = T.dvector("y")
+    y = tt.dvector("y")
    # initialize the weight vector w randomly
    #
@@ -562,15 +562,16 @@ It will be used repeatedly.
    print(b.get_value())
    # Construct Theano expression graph
-    p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
+    p_1 = 1 / (1 + tt.exp(-T.dot(x, w) - b))        # Probability that target = 1
    prediction = p_1 > 0.5                          # The prediction thresholded
-    xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
+    xent = -y * tt.log(p_1) - (1-y) * tt.log(1-p_1) # Cross-entropy loss function
-    cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
+    cost = xent.mean() + 0.01 * (w ** 2).sum()      # The cost to minimize
-    gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
+    gw, gb = tt.grad(cost, [w, b])                  # Compute the gradient of the cost
                                                    # w.r.t weight vector w and
-                                              # bias term b
+                                                    # bias term b (we shall
-                                              # (we shall return to this in a
+                                                    # return to this in a
-                                              # following section of this tutorial)
+                                                    # following section of this
+                                                    # tutorial)
    # Compile
    train = theano.function(

--- a/doc/tutorial/gradients.txt
+++ b/doc/tutorial/gradients.txt
@@ -11,7 +11,7 @@ Computing Gradients
 Now let's use Theano for a slightly more sophisticated task: create a
 function which computes the derivative of some expression *y* with
-respect to its parameter *x*. To do this we will use the macro ``T.grad``.
+respect to its parameter *x*. To do this we will use the macro ``tt.grad``.
 For instance, we can compute the
 gradient of :math:`x^2` with respect to :math:`x`. Note that:
 :math:`d(x^2)/dx = 2 \cdot x`.
@@ -25,11 +25,11 @@ Here is the code to compute this gradient:
 >>> import numpy
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> from theano import pp
->>> x = T.dscalar('x')
+>>> x = tt.dscalar('x')
 >>> y = x ** 2
->>> gy = T.grad(y, x)
+>>> gy = tt.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = theano.function([x], gy)
@@ -68,30 +68,30 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_5
->>> x = T.dmatrix('x')
+>>> x = tt.dmatrix('x')
->>> s = T.sum(1 / (1 + T.exp(-x)))
+>>> s = tt.sum(1 / (1 + tt.exp(-x)))
->>> gs = T.grad(s, x)
+>>> gs = tt.grad(s, x)
 >>> dlogistic = theano.function([x], gs)
 >>> dlogistic([[0, 1], [-1, -2]])
 array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])
-In general, for any **scalar** expression *s*, ``T.grad(s, w)`` provides
+In general, for any **scalar** expression *s*, ``tt.grad(s, w)`` provides
 the Theano expression for computing :math:`\frac{\partial s}{\partial w}`. In
 this way Theano can be used for doing **efficient** symbolic differentiation
-(as the expression returned by ``T.grad`` will be optimized during compilation), even for
+(as the expression returned by ``tt.grad`` will be optimized during compilation), even for
 function with many inputs. (see `automatic differentiation <http://en.wikipedia.org/wiki/Automatic_differentiation>`_ for a description
 of symbolic differentiation).
 .. note::
-   The second argument of ``T.grad`` can be a list, in which case the
+   The second argument of ``tt.grad`` can be a list, in which case the
   output is also a list. The order in both lists is important: element
   *i* of the output list is the gradient of the first argument of
-   ``T.grad`` with respect to the *i*-th element of the list given as second argument.
+   ``tt.grad`` with respect to the *i*-th element of the list given as second argument.
-   The first argument of ``T.grad`` has to be a scalar (a tensor
+   The first argument of ``tt.grad`` has to be a scalar (a tensor
   of size 1). For more information on the semantics of the arguments of
-   ``T.grad`` and details about the implementation, see
+   ``tt.grad`` and details about the implementation, see
   :ref:`this<libdoc_gradient>` section of the library.
   Additional information on the inner workings of differentiation may also be
@@ -121,25 +121,25 @@ do is to loop over the entries in *y* and compute the gradient of
    shall return to :ref:`scan<tutloop>` later in this tutorial.
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
 >>> y = x ** 2
->>> J, updates = theano.scan(lambda i, y, x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
+>>> J, updates = theano.scan(lambda i, y, x : tt.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
 >>> f = theano.function([x], J, updates=updates)
 >>> f([4, 4])
 array([[ 8.,  0.],
       [ 0.,  8.]])
 What we do in this code is to generate a sequence of *ints* from *0* to
-``y.shape[0]`` using ``T.arange``. Then we loop through this sequence, and
+``y.shape[0]`` using ``tt.arange``. Then we loop through this sequence, and
 at each step, we compute the gradient of element *y[i]* with respect to
 *x*. ``scan`` automatically concatenates all these rows, generating a
 matrix which corresponds to the Jacobian.
 .. note::
-    There are some pitfalls to be aware of regarding ``T.grad``. One of them is that you
+    There are some pitfalls to be aware of regarding ``tt.grad``. One of them is that you
    cannot re-write the above expression of the Jacobian as
-    ``theano.scan(lambda y_i,x: T.grad(y_i,x), sequences=y,
+    ``theano.scan(lambda y_i,x: tt.grad(y_i,x), sequences=y,
    non_sequences=x)``, even though from the documentation of scan this
    seems possible. The reason is that *y_i* will not be a function of
    *x* anymore, while *y[i]* still is.
@@ -156,14 +156,14 @@ to do it manually.
 You can compute the Hessian manually similarly to the Jacobian. The only
 difference is that now, instead of computing the Jacobian of some expression
-*y*, we compute the Jacobian of ``T.grad(cost,x)``, where *cost* is some
+*y*, we compute the Jacobian of ``tt.grad(cost,x)``, where *cost* is some
 scalar.
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
 >>> y = x ** 2
 >>> cost = y.sum()
->>> gy = T.grad(cost, x)
+>>> gy = tt.grad(cost, x)
->>> H, updates = theano.scan(lambda i, gy,x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
+>>> H, updates = theano.scan(lambda i, gy,x : tt.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
 >>> f = theano.function([x], H, updates=updates)
 >>> f([4, 4])
 array([[ 2.,  0.],
@@ -200,11 +200,11 @@ form of the operation. In order to evaluate the *R-operation* of
 expression *y*, with respect to *x*, multiplying the Jacobian with *v*
 you need to do something similar to this:
->>> W = T.dmatrix('W')
+>>> W = tt.dmatrix('W')
->>> V = T.dmatrix('V')
+>>> V = tt.dmatrix('V')
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
->>> y = T.dot(x, W)
+>>> y = tt.dot(x, W)
->>> JV = T.Rop(y, W, V)
+>>> JV = tt.Rop(y, W, V)
 >>> f = theano.function([W, V, x], JV)
 >>> f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])
 array([ 2.,  2.])
@@ -219,11 +219,11 @@ the Jacobian. The mathematical formula would be :math:`v \frac{\partial
 f(x)}{\partial x}`. The *L-operator* is also supported for generic tensors
 (not only for vectors). Similarly, it can be implemented as follows:
->>> W = T.dmatrix('W')
+>>> W = tt.dmatrix('W')
->>> v = T.dvector('v')
+>>> v = tt.dvector('v')
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
->>> y = T.dot(x, W)
+>>> y = tt.dot(x, W)
->>> VJ = T.Lop(y, W, v)
+>>> VJ = tt.Lop(y, W, v)
 >>> f = theano.function([v,x], VJ)
 >>> f([2, 2], [0, 1])
 array([[ 0.,  0.],
@@ -251,11 +251,11 @@ Hessian matrix, you have two options that will
 give you the same result, though these options might exhibit differing performances.
 Hence, we suggest profiling the methods before using either one of the two:
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
->>> v = T.dvector('v')
+>>> v = tt.dvector('v')
->>> y = T.sum(x ** 2)
+>>> y = tt.sum(x ** 2)
->>> gy = T.grad(y, x)
+>>> gy = tt.grad(y, x)
->>> vH = T.grad(T.sum(gy * v), x)
+>>> vH = tt.grad(tt.sum(gy * v), x)
 >>> f = theano.function([x, v], vH)
 >>> f([4, 4], [2, 2])
 array([ 4.,  4.])
@@ -263,11 +263,11 @@ array([ 4.,  4.])
 or, making use of the *R-operator*:
->>> x = T.dvector('x')
+>>> x = tt.dvector('x')
->>> v = T.dvector('v')
+>>> v = tt.dvector('v')
->>> y = T.sum(x ** 2)
+>>> y = tt.sum(x ** 2)
->>> gy = T.grad(y, x)
+>>> gy = tt.grad(y, x)
->>> Hv = T.Rop(gy, x, v)
+>>> Hv = tt.Rop(gy, x, v)
 >>> f = theano.function([x, v], Hv)
 >>> f([4, 4], [2, 2])
 array([ 4.,  4.])

--- a/doc/tutorial/index.txt
+++ b/doc/tutorial/index.txt
@@ -11,9 +11,9 @@ Let us start an interactive session (e.g. with ``python`` or ``ipython``) and im
 Several of the symbols you will need to use are in the ``tensor`` subpackage
 of Theano. Let us import that subpackage under a handy name like
-``T`` (the tutorials will frequently use this convention).
+``tt`` (the tutorials will frequently use this convention).
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 If that succeeded you are ready for the tutorial, otherwise check your
 installation (see :ref:`install`).

--- a/doc/tutorial/loop.txt
+++ b/doc/tutorial/loop.txt
@@ -32,15 +32,15 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # defining the tensor variables
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  W = T.matrix("W")
+  W = tt.matrix("W")
-  b_sym = T.vector("b_sym")
+  b_sym = tt.vector("b_sym")
-  results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
+  results, updates = theano.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym), sequences=X)
  compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)
  # test values
@@ -66,19 +66,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variables
-  X = T.vector("X")
+  X = tt.vector("X")
-  W = T.matrix("W")
+  W = tt.matrix("W")
-  b_sym = T.vector("b_sym")
+  b_sym = tt.vector("b_sym")
-  U = T.matrix("U")
+  U = tt.matrix("U")
-  Y = T.matrix("Y")
+  Y = tt.matrix("Y")
-  V = T.matrix("V")
+  V = tt.matrix("V")
-  P = T.matrix("P")
+  P = tt.matrix("P")
-  results, updates = theano.scan(lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T.dot(p, V)),
+  results, updates = theano.scan(lambda y, p, x_tm1: tt.tanh(tt.dot(x_tm1, W) + tt.dot(y, U) + tt.dot(p, V)),
            sequences=[Y, P[::-1]], outputs_info=[X])
  compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=results)
@@ -120,12 +120,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variable
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
+  results, updates = theano.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X])
  compute_norm_lines = theano.function(inputs=[X], outputs=results)
  # test value
@@ -145,12 +145,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variable
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
+  results, updates = theano.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X.T])
  compute_norm_cols = theano.function(inputs=[X], outputs=results)
  # test value
@@ -170,14 +170,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  floatX = "float32"
  # define tensor variable
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
+  results, updates = theano.scan(lambda i, j, t_f: tt.cast(X[i, j] + t_f, floatX),
-                    sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
+                    sequences=[tt.arange(X.shape[0]), tt.arange(X.shape[1])],
                    outputs_info=np.asarray(0., dtype=floatX))
  result = results[-1]
  compute_trace = theano.function(inputs=[X], outputs=result)
@@ -201,18 +201,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variables
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  W = T.matrix("W")
+  W = tt.matrix("W")
-  b_sym = T.vector("b_sym")
+  b_sym = tt.vector("b_sym")
-  U = T.matrix("U")
+  U = tt.matrix("U")
-  V = T.matrix("V")
+  V = tt.matrix("V")
-  n_sym = T.iscalar("n_sym")
+  n_sym = tt.iscalar("n_sym")
-  results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
+  results, updates = theano.scan(lambda x_tm2, x_tm1: tt.dot(x_tm2, U) + tt.dot(x_tm1, V) + tt.tanh(tt.dot(x_tm1, W) + b_sym),
                      n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
  compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
@@ -266,14 +266,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variables
-  v = T.vector()
+  v = tt.vector()
-  A = T.matrix()
+  A = tt.matrix()
-  y = T.tanh(T.dot(v, A))
+  y = tt.tanh(tt.dot(v, A))
-  results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
+  results, updates = theano.scan(lambda i: tt.grad(y[i], v), sequences=[tt.arange(y.shape[0])])
  compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
  # test values
@@ -301,12 +301,12 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define shared variables
  k = theano.shared(0)
-  n_sym = T.iscalar("n_sym")
+  n_sym = tt.iscalar("n_sym")
  results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
  accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
@@ -320,19 +320,19 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  import numpy as np
  # define tensor variables
-  X = T.matrix("X")
+  X = tt.matrix("X")
-  W = T.matrix("W")
+  W = tt.matrix("W")
-  b_sym = T.vector("b_sym")
+  b_sym = tt.vector("b_sym")
  # define shared random stream
-  trng = T.shared_randomstreams.RandomStreams(1234)
+  trng = tt.shared_randomstreams.RandomStreams(1234)
  d=trng.binomial(size=W[1].shape)
-  results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
+  results, updates = theano.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym) * d, sequences=X)
  compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
                            updates=updates, allow_input_downcast=True)
  x = np.eye(10, 2, dtype=theano.config.floatX)
@@ -361,18 +361,18 @@ Note that if you want to use a random variable ``d`` that will not be updated th
 .. testcode::
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  theano.config.warn.subtensor_merge_bug = False
-  k = T.iscalar("k")
+  k = tt.iscalar("k")
-  A = T.vector("A")
+  A = tt.vector("A")
  def inner_fct(prior_result, B):
      return prior_result * B
  # Symbolic description of the result
  result, updates = theano.scan(fn=inner_fct,
-                              outputs_info=T.ones_like(A),
+                              outputs_info=tt.ones_like(A),
                              non_sequences=A, n_steps=k)
  # Scan has provided us with A ** 1 through A ** k.  Keep only the last
@@ -395,11 +395,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
  import numpy
  import theano
-  import theano.tensor as T
+  import theano.tensor as tt
  theano.config.warn.subtensor_merge_bug = False
  coefficients = theano.tensor.vector("coefficients")
-  x = T.scalar("x")
+  x = tt.scalar("x")
  max_coefficients_supported = 10000
  # Generate the components of the polynomial

--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -47,7 +47,7 @@ Consider the logistic regression:
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    rng = numpy.random
    N = 400
@@ -57,19 +57,19 @@ Consider the logistic regression:
    training_steps = 10000
    # Declare Theano symbolic variables
-    x = T.matrix("x")
+    x = tt.matrix("x")
-    y = T.vector("y")
+    y = tt.vector("y")
    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
    x.tag.test_value = D[0]
    y.tag.test_value = D[1]
    # Construct Theano expression graph
-    p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+    p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
    prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
-    xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+    xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
    cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
-    gw,gb = T.grad(cost, [w,b])
+    gw,gb = tt.grad(cost, [w,b])
    # Compile expressions to functions
    train = theano.function(
@@ -252,7 +252,7 @@ DebugMode is used as follows:
 .. testcode::
-    x = T.dvector('x')
+    x = tt.dvector('x')
    f = theano.function([x], 10 * x, mode='DebugMode')

--- a/doc/tutorial/printing_drawing.txt
+++ b/doc/tutorial/printing_drawing.txt
@@ -27,7 +27,7 @@ Consider again the logistic regression example:
 >>> import numpy
 >>> import theano
->>> import theano.tensor as T
+>>> import theano.tensor as tt
 >>> rng = numpy.random
 >>> # Training data
 >>> N = 400
@@ -35,19 +35,19 @@ Consider again the logistic regression example:
 >>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
 >>> training_steps = 10000
 >>> # Declare Theano symbolic variables
->>> x = T.matrix("x")
+>>> x = tt.matrix("x")
->>> y = T.vector("y")
+>>> y = tt.vector("y")
 >>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
 >>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
 >>> x.tag.test_value = D[0]
 >>> y.tag.test_value = D[1]
 >>> # Construct Theano expression graph
->>> p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+>>> p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
 >>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
 >>> # Compute gradients
->>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+>>> xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
 >>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = T.grad(cost, [w,b])
+>>> gw,gb = tt.grad(cost, [w,b])
 >>> # Training and prediction function
 >>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
 >>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")

--- a/doc/tutorial/using_gpu.txt
+++ b/doc/tutorial/using_gpu.txt
@@ -313,7 +313,7 @@ Consider again the logistic regression:
    import numpy
    import theano
-    import theano.tensor as T
+    import theano.tensor as tt
    rng = numpy.random
    N = 400
@@ -323,19 +323,19 @@ Consider again the logistic regression:
    training_steps = 10000
    # Declare Theano symbolic variables
-    x = T.matrix("x")
+    x = tt.matrix("x")
-    y = T.vector("y")
+    y = tt.vector("y")
    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
    x.tag.test_value = D[0]
    y.tag.test_value = D[1]
    # Construct Theano expression graph
-    p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
+    p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
    prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
-    xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
+    xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
    cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
-    gw,gb = T.grad(cost, [w,b])
+    gw,gb = tt.grad(cost, [w,b])
    # Compile expressions to functions
    train = theano.function(