Merge pull request #5782 from Amrithasuresh/master

Updated numpy as np #4218

Merge pull request #5782 from Amrithasuresh/master
bea31470 · Frédéric Bastien · GitHub · 56da8ca8 · 65a48d98 · bea31470
--- a/theano/sandbox/fourier.py
+++ b/theano/sandbox/fourier.py
@@ -4,7 +4,7 @@ Provides Ops for FFT and DCT.
 """

 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import numpy.fft

 from six.moves import xrange
@@ -126,13 +126,13 @@ def dct_matrix(rows, cols, unitary=True):
    This algorithm is adapted from Dan Ellis' Rastmat spec2cep.m, lines 15-20.

    """
-    rval = numpy.zeros((rows, cols))
-    col_range = numpy.arange(cols)
-    scale = numpy.sqrt(2.0 / cols)
+    rval = np.zeros((rows, cols))
+    col_range = np.arange(cols)
+    scale = np.sqrt(2.0 / cols)
    for i in xrange(rows):
-        rval[i] = numpy.cos(
-            i * (col_range * 2 + 1) / (2.0 * cols) * numpy.pi) * scale
+        rval[i] = np.cos(
+            i * (col_range * 2 + 1) / (2.0 * cols) * np.pi) * scale

    if unitary:
-        rval[0] *= numpy.sqrt(0.5)
+        rval[0] *= np.sqrt(0.5)
    return rval
--- a/theano/sandbox/linalg/tests/test_linalg.py
+++ b/theano/sandbox/linalg/tests/test_linalg.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import numpy.linalg

 import theano
@@ -39,9 +39,9 @@ def test_rop_lop():
                        non_sequences=[y, mx, mv])
    scan_f = function([mx, mv], sy)

-    rng = numpy.random.RandomState(utt.fetch_seed())
-    vx = numpy.asarray(rng.randn(4, 4), theano.config.floatX)
-    vv = numpy.asarray(rng.randn(4, 4), theano.config.floatX)
+    rng = np.random.RandomState(utt.fetch_seed())
+    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
+    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = rop_f(vx, vv)
    v2 = scan_f(vx, vv)
@@ -61,7 +61,7 @@ def test_rop_lop():
            'Op did not raised an error even though the function'
            ' is not differentiable'))

-    vv = numpy.asarray(rng.uniform(size=(4,)), theano.config.floatX)
+    vv = np.asarray(rng.uniform(size=(4,)), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

@@ -75,21 +75,21 @@ def test_rop_lop():

 def test_spectral_radius_bound():
    tol = 10 ** (-6)
-    rng = numpy.random.RandomState(utt.fetch_seed())
+    rng = np.random.RandomState(utt.fetch_seed())
    x = theano.tensor.matrix()
    radius_bound = spectral_radius_bound(x, 5)
    f = theano.function([x], radius_bound)

    shp = (3, 4)
    m = rng.rand(*shp)
-    m = numpy.cov(m).astype(config.floatX)
+    m = np.cov(m).astype(config.floatX)
    radius_bound_theano = f(m)

    # test the approximation
    mm = m
    for i in range(5):
-        mm = numpy.dot(mm, mm)
-    radius_bound_numpy = numpy.trace(mm) ** (2 ** (-5))
+        mm = np.dot(mm, mm)
+    radius_bound_numpy = np.trace(mm) ** (2 ** (-5))
    assert abs(radius_bound_numpy - radius_bound_theano) < tol

    # test the bound

--- a/theano/sandbox/minimal.py
+++ b/theano/sandbox/minimal.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division

 import unittest

-import numpy
+import numpy as np

 from theano import gof, tensor, function
 from theano.tests import unittest_tools as utt
@@ -39,11 +39,11 @@ class Minimal(gof.Op):
        # but do not modify any of the arguments [inplace].
        print("perform got %i arguments" % len(inputs))

-        print("Max of input[0] is ", numpy.max(inputs[0]))
+        print("Max of input[0] is ", np.max(inputs[0]))

        # return some computed value.
        # do not return something that is aliased to one of the inputs.
-        output[0] = numpy.asarray(0, dtype='int64')
+        output[0] = np.asarray(0, dtype='int64')

 minimal = Minimal()

@@ -55,7 +55,7 @@ minimal = Minimal()

 class T_minimal(unittest.TestCase):
    def setUp(self):
-        self.rng = numpy.random.RandomState(utt.fetch_seed(666))
+        self.rng = np.random.RandomState(utt.fetch_seed(666))

    def test0(self):
        A = tensor.matrix()
@@ -66,6 +66,6 @@ class T_minimal(unittest.TestCase):
        print('built')

        Aval = self.rng.randn(5, 5)
-        bval = numpy.arange(5, dtype=float)
+        bval = np.arange(5, dtype=float)
        f(Aval, bval)
        print('done')
--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import warnings

 import theano
@@ -172,7 +172,7 @@ class MultinomialFromUniform(Op):
            raise ValueError("unis.shape[0] != pvals.shape[0] * n_samples",
                             unis.shape[0], pvals.shape[0], n_samples)
        if z[0] is None or z[0].shape != pvals.shape:
-            z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype)
+            z[0] = np.zeros(pvals.shape, dtype=node.outputs[0].dtype)
        else:
            z[0].fill(0)

@@ -209,7 +209,7 @@ class MultinomialFromUniform(Op):
                # have the same answer as the c code as in the c code
                # the cumul is in double precission.
                cumsum = pvals[n].cumsum(dtype='float64')
-                z[0][n, numpy.searchsorted(cumsum, unis_n)] += 1
+                z[0][n, np.searchsorted(cumsum, unis_n)] += 1


 class ChoiceFromUniform(MultinomialFromUniform):
@@ -380,8 +380,8 @@ class ChoiceFromUniform(MultinomialFromUniform):
        else:
            odtype = self.odtype
        if (z[0] is None or
-                not numpy.all(z[0].shape == [pvals.shape[0], n_samples])):
-            z[0] = -1 * numpy.ones((pvals.shape[0], n_samples), dtype=odtype)
+                not np.all(z[0].shape == [pvals.shape[0], n_samples])):
+            z[0] = -1 * np.ones((pvals.shape[0], n_samples), dtype=odtype)

        nb_multi = pvals.shape[0]
        nb_outcomes = pvals.shape[1]

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -8,7 +8,7 @@ http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
 from __future__ import absolute_import, print_function, division
 import warnings

-import numpy
+import numpy as np
 from six import integer_types
 from six.moves import xrange

@@ -38,7 +38,7 @@ if theano.sandbox.cuda.cuda_available:
 def matVecModM(A, s, m):
    # TODO : need description for method, parameter and return
    assert A.dtype == 'int64'
-    return numpy.int32(numpy.sum((A * s) % m, 1) % m)
+    return np.int32(np.sum((A * s) % m, 1) % m)


 def multMatVect(v, A, m1, B, m2):
@@ -97,7 +97,7 @@ class DotModulo(Op):
        (out,) = outputs
        o1 = matVecModM(A, s, m)
        o2 = matVecModM(A2, s2, m2)
-        out[0] = numpy.concatenate((o1, o2))
+        out[0] = np.concatenate((o1, o2))

    def c_code_cache_version(self):
        return (6,)
@@ -198,39 +198,39 @@ class DotModulo(Op):

 # MRG31k3p
 # generator constants :
-M1 = numpy.asarray(numpy.int32(2147483647))    # 2^31 - 1
-M2 = numpy.asarray(numpy.int32(2147462579))    # 2^31 - 21069
-MASK12 = numpy.int32(511)                      # 2^9 - 1
-MASK13 = numpy.int32(16777215)                 # 2^24 - 1
-MASK2 = numpy.int32(65535)                     # 2^16 - 1
-MULT2 = numpy.int32(21069)
+M1 = np.asarray(np.int32(2147483647))       # 2^31 - 1
+M2 = np.asarray(np.int32(2147462579))       # 2^31 - 21069
+MASK12 = np.int32(511)                      # 2^9 - 1
+MASK13 = np.int32(16777215)                 # 2^24 - 1
+MASK2 = np.int32(65535)                     # 2^16 - 1
+MULT2 = np.int32(21069)
 NORM = 4.656612873077392578125e-10  # 1./2^31

-# A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]],
+# A1p0 = np.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]],
 #                      dtype='int64')
-# A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]],
+# A2p0 = np.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]],
 #                      dtype='int64')

-A1p72 = numpy.asarray([[1516919229, 758510237, 499121365],
-                       [1884998244, 1516919229, 335398200],
-                       [601897748, 1884998244, 358115744]],
-                      dtype='int64')
-A2p72 = numpy.asarray([[1228857673, 1496414766, 954677935],
-                       [1133297478, 1407477216, 1496414766],
-                       [2002613992, 1639496704, 1407477216]],
-                      dtype='int64')
+A1p72 = np.asarray([[1516919229, 758510237, 499121365],
+                    [1884998244, 1516919229, 335398200],
+                    [601897748, 1884998244, 358115744]],
+                   dtype='int64')
+A2p72 = np.asarray([[1228857673, 1496414766, 954677935],
+                    [1133297478, 1407477216, 1496414766],
+                    [2002613992, 1639496704, 1407477216]],
+                   dtype='int64')

-A1p134 = numpy.asarray(
+A1p134 = np.asarray(
    [[1702500920, 1849582496, 1656874625],
     [828554832, 1702500920, 1512419905],
     [1143731069, 828554832, 102237247]],
    dtype='int64')
-A2p134 = numpy.asarray(
+A2p134 = np.asarray(
    [[796789021, 1464208080, 607337906],
     [1241679051, 1431130166, 1464208080],
     [1401213391, 1178684362, 1431130166]],
    dtype='int64')
-np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
+np_int32_vals = [np.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]


 def ff_2p134(rstate):
@@ -246,14 +246,14 @@ def ff_2p72(rstate):
 def mrg_next_value(rstate, new_rstate):
    # TODO : need description for method, parameter and return
    x11, x12, x13, x21, x22, x23 = rstate
-    assert type(x11) == numpy.int32
+    assert type(x11) == np.int32

    i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
    # first component
    y1 = (((x12 & MASK12) << i22) + (x12 >> i9) +
          ((x13 & MASK13) << i7) + (x13 >> i24))

-    assert type(y1) == numpy.int32
+    assert type(y1) == np.int32
    if (y1 < 0 or y1 >= M1):  # must also check overflow
        y1 -= M1
    y1 += x13
@@ -266,11 +266,11 @@ def mrg_next_value(rstate, new_rstate):

    # second component
    y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16))
-    assert type(y1) == numpy.int32
+    assert type(y1) == np.int32
    if (y1 < 0 or y1 >= M2):
        y1 -= M2
    y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16))
-    assert type(y2) == numpy.int32
+    assert type(y2) == np.int32
    if (y2 < 0 or y2 >= M2):
        y2 -= M2
    y2 += x23
@@ -286,7 +286,7 @@ def mrg_next_value(rstate, new_rstate):

    # Must never return either 0 or M1+1
    new_rstate[...] = [x11, x12, x13, x21, x22, x23]
-    assert new_rstate.dtype == numpy.int32
+    assert new_rstate.dtype == np.int32
    if (x11 <= x21):
        return (x11 - x21 + M1) * NORM
    else:
@@ -360,22 +360,22 @@ class mrg_uniform(mrg_uniform_base):
            # some rng_mrg tests) I also add this limit here.
            raise ValueError("rng_mrg does not support more then (2**31 -1) samples")

-        rstate = numpy.asarray(rstate)  # bring state from GPU if necessary
+        rstate = np.asarray(rstate)  # bring state from GPU if necessary
        if not self.inplace:
            rstate = rstate.copy()

        n_streams, _ = rstate.shape

-        rval = numpy.zeros(n_elements, dtype=self.output_type.dtype)
+        rval = np.zeros(n_elements, dtype=self.output_type.dtype)

-        err_orig = numpy.seterr(over='ignore')
+        err_orig = np.seterr(over='ignore')
        try:
            for i in xrange(n_elements):
                sample = mrg_next_value(rstate[i % n_streams],
                                        rstate[i % n_streams])
                rval[i] = sample
        finally:
-            numpy.seterr(**err_orig)
+            np.seterr(**err_orig)

        # send to GPU if necessary
        o_rstate[0] = node.outputs[0].type.filter(rstate)
@@ -396,13 +396,13 @@ class mrg_uniform(mrg_uniform_base):
                'NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|'
                'NPY_ARRAY_ALIGNED')
        ndim = self.output_type.ndim
-        o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
+        o_type_num = np.asarray(0, dtype=self.output_type.dtype).dtype.num
        fail = sub['fail']
        if self.output_type.dtype == 'float32':
            otype = 'float'
-            NORM = '4.6566126e-10f'  # numpy.float32(1.0/(2**31+65))
+            NORM = '4.6566126e-10f'  # np.float32(1.0/(2**31+65))
            # this was determined by finding the biggest number such that
-            # numpy.float32(number * M1) < 1.0
+            # np.float32(number * M1) < 1.0
        else:
            otype = 'double'
            NORM = '4.656612873077392578125e-10'
@@ -590,9 +590,9 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
    def c_support_code_apply(self, node, nodename):
        if self.output_type.dtype == 'float32':
            otype = 'float'
-            NORM = '4.6566126e-10f'  # numpy.float32(1.0/(2**31+65))
+            NORM = '4.6566126e-10f'  # np.float32(1.0/(2**31+65))
            # this was determined by finding the biggest number such that
-            # numpy.float32(number * M1) < 1.0
+            # np.float32(number * M1) < 1.0
        else:
            otype = 'double'
            NORM = '4.656612873077392578125e-10'
@@ -686,7 +686,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
        o_rstate, o_sample = out
        inplace = int(self.inplace)
        ndim = self.output_type.ndim
-        o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
+        o_type_num = np.asarray(0, dtype=self.output_type.dtype).dtype.num
        fail = sub['fail']

        if self.output_type.dtype == 'float32':
@@ -858,15 +858,15 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
            otype = 'ga_half'
            # limit the values of the state that we use.
            mask = '& 0x7fff'
-            NORM = '3.0518e-05f'  # numpy.float16(1.0/(2**15+8))
+            NORM = '3.0518e-05f'  # np.float16(1.0/(2**15+8))
            # this was determined by finding the biggest number such that
-            # numpy.float16(number * (M1 & 0x7fff)) < 1.0
+            # np.float16(number * (M1 & 0x7fff)) < 1.0
        elif self.output_type.dtype == 'float32':
            otype = 'float'
            mask = ''
-            NORM = '4.6566126e-10f'  # numpy.float32(1.0/(2**31+65))
+            NORM = '4.6566126e-10f'  # np.float32(1.0/(2**31+65))
            # this was determined by finding the biggest number such that
-            # numpy.float32(number * M1) < 1.0
+            # np.float32(number * M1) < 1.0
        elif self.output_type.dtype == 'float64':
            otype = 'double'
            mask = ''
@@ -969,7 +969,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
        o_rstate, o_sample = out
        inplace = int(self.inplace)
        ndim = self.output_type.ndim
-        o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
+        o_type_num = np.asarray(0, dtype=self.output_type.dtype).dtype.num
        fail = sub['fail']
        ctx = sub['params']
        kname = self.gpu_kernels(node, nodename)[0].objvar
@@ -1176,7 +1176,7 @@ class MRG_RandomStreams(object):
                raise ValueError('seed should not be 0', seed)
            elif seed >= M2:
                raise ValueError('seed should be less than %i' % M2, seed)
-            self.rstate = numpy.asarray([seed] * 6, dtype='int32')
+            self.rstate = np.asarray([seed] * 6, dtype='int32')
        elif len(seed) == 6:
            if seed[0] == 0 and seed[1] == 0 and seed[2] == 0:
                raise ValueError(
@@ -1192,7 +1192,7 @@ class MRG_RandomStreams(object):
                raise ValueError(
                    'The last 3 values of seed should be less than %i' % M2,
                    seed)
-            self.rstate = numpy.asarray(seed, dtype='int32')
+            self.rstate = np.asarray(seed, dtype='int32')
        else:
            raise TypeError("seed should be 1 integer or 6 integers")

@@ -1234,7 +1234,7 @@ class MRG_RandomStreams(object):
        """
        # self.rstate = ff_2p134(self.rstate)
        self.rstate = multMatVect(self.rstate, A1p134, M1, A2p134, M2)
-        assert self.rstate.dtype == numpy.int32
+        assert self.rstate.dtype == np.int32

    @theano.configparser.change_flags(compute_test_value='off')
    def get_substream_rstates(self, n_streams, dtype, inc_rstate=True):
@@ -1247,7 +1247,7 @@ class MRG_RandomStreams(object):
        assert isinstance(dtype, str)
        assert n_streams < 2**72
        assert n_streams > 0
-        rval = numpy.zeros((n_streams, 6), dtype='int32')
+        rval = np.zeros((n_streams, 6), dtype='int32')
        rval[0] = self.rstate

        # If multMatVect.dot_modulo isn't compiled, compile it.
@@ -1276,7 +1276,7 @@ class MRG_RandomStreams(object):
            # HACK - we use fact that int32 and float32 have same size to
            # sneak ints into the CudaNdarray type.
            # these *SHOULD NEVER BE USED AS FLOATS*
-            tmp_float_buf = numpy.frombuffer(rval.data, dtype='float32')
+            tmp_float_buf = np.frombuffer(rval.data, dtype='float32')
            assert tmp_float_buf.shape == rval.shape
            assert (tmp_float_buf.view('int32') == rval).all()
            rval = tmp_float_buf
@@ -1334,9 +1334,9 @@ class MRG_RandomStreams(object):

        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
-            assert all([isinstance(i, (numpy.integer, integer_types, Variable))
+            assert all([isinstance(i, (np.integer, integer_types, Variable))
                        for i in size]), msg
-            if any([isinstance(i, (numpy.integer, integer_types)) and i <= 0
+            if any([isinstance(i, (np.integer, integer_types)) and i <= 0
                    for i in size]):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
@@ -1558,10 +1558,10 @@ class MRG_RandomStreams(object):
        evened = False
        constant = False
        if (isinstance(size, tuple) and
-                all([isinstance(i, (numpy.integer, integer_types)) for i in size])):
+                all([isinstance(i, (np.integer, integer_types)) for i in size])):
            constant = True
            # Force dtype because it defaults to float when size is empty
-            n_samples = numpy.prod(size, dtype='int64')
+            n_samples = np.prod(size, dtype='int64')

            if n_samples % 2 == 1:
                n_samples += 1
@@ -1583,14 +1583,14 @@ class MRG_RandomStreams(object):
        sqrt_ln_U1 = sqrt(-2.0 * log(U1))
        # TypeError: 'TensorVariable' object does not support item assignment
        # so this doesn't work...
-        # normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
-        # normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
+        # normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*np.pi*U2)
+        # normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*np.pi*U2)

        # so trying this instead
        first_half = sqrt_ln_U1 * cos(
-            numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
+            np.array(2.0 * np.pi, dtype=dtype) * U2)
        second_half = sqrt_ln_U1 * sin(
-            numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
+            np.array(2.0 * np.pi, dtype=dtype) * U2)
        normal_samples = join(0, first_half, second_half)

        final_samples = None

--- a/theano/sandbox/tests/test_multinomial.py
+++ b/theano/sandbox/tests/test_multinomial.py
@@ -4,7 +4,7 @@ import sys
 from six import reraise

 from nose.plugins.skip import SkipTest
-import numpy
+import numpy as np

 import theano
 from theano import config, function, tensor
@@ -40,9 +40,9 @@ def test_n_samples_1():

    f = function([p, u, n], m, allow_input_downcast=True)

-    numpy.random.seed(12345)
+    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000, 10000]:
-        uni = numpy.random.rand(2 * i).astype(config.floatX)
+        uni = np.random.rand(2 * i).astype(config.floatX)
        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])

@@ -55,17 +55,17 @@ def test_n_samples_2():

    f = function([p, u, n], m, allow_input_downcast=True)

-    numpy.random.seed(12345)
+    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000]:
-        uni = numpy.random.rand(i).astype(config.floatX)
-        pvals = numpy.random.randint(1, 1000, (1, 1000)).astype(config.floatX)
+        uni = np.random.rand(i).astype(config.floatX)
+        pvals = np.random.randint(1, 1000, (1, 1000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
        assert res.sum() == i

    for i in [1, 5, 10, 100, 1000]:
-        uni = numpy.random.rand(i).astype(config.floatX)
-        pvals = numpy.random.randint(
+        uni = np.random.rand(i).astype(config.floatX)
+        pvals = np.random.randint(
            1, 1000000, (1, 1000000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
@@ -104,8 +104,8 @@ def test_n_samples_compatibility():
            raise

        f = theano.function([X], samples)
-        res = f(numpy.random.randn(20, 10))
-        assert numpy.all(res.sum(axis=1) == 1)
+        res = f(np.random.randn(20, 10))
+        assert np.all(res.sum(axis=1) == 1)


 def test_multinomial_0():
@@ -160,9 +160,9 @@ def test_multinomial_large():
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

-        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
+        pval = np.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
-        uval = numpy.ones_like(pval[:, 0]) * 0.5
+        uval = np.ones_like(pval[:, 0]) * 0.5
        mval = f(pval, uval)

        assert mval.shape == pval.shape
@@ -175,7 +175,7 @@ def test_multinomial_large():
        else:
            raise NotImplementedError(config.cast_policy)
        utt.assert_allclose(mval.sum(axis=1), 2)
-        asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
+        asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
        utt.assert_allclose(mval, asdf)  # broadcast over all rows
    run_with_c(body)
    if cuda.cuda_available:
@@ -216,9 +216,9 @@ def test_gpu_opt():
    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
+    pval = np.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)

    # Test with a row, it was failing in the past.
@@ -230,7 +230,7 @@ def test_gpu_opt():
    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
+    pval = np.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
--- a/theano/sandbox/tests/test_multinomial_wo_replacement.py
+++ b/theano/sandbox/tests/test_multinomial_wo_replacement.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import os
 from theano import config, function, tensor
 from theano.compat import PY3
@@ -24,25 +24,25 @@ class test_OP(unittest.TestCase):

        n_elements = 1000
        all_indices = range(n_elements)
-        numpy.random.seed(12345)
+        np.random.seed(12345)
        expected = [
-            numpy.asarray([[931, 318, 185, 209, 559]]),
-            numpy.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
-            numpy.asarray([[546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335,
-                            451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507,
-                            730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672]])]
+            np.asarray([[931, 318, 185, 209, 559]]),
+            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
+            np.asarray([[546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335,
+                         451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507,
+                         730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672]])]

        for i in [5, 10, 50, 100, 500, n_elements]:
-            uni = numpy.random.rand(i).astype(config.floatX)
-            pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+            uni = np.random.rand(i).astype(config.floatX)
+            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            for ii in range(len(expected)):
                if expected[ii].shape == res.shape:
                    assert (expected[ii] == res).all()
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            assert len(res) == i
-            assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
+            assert np.all(np.in1d(np.unique(res), all_indices)), res

    def test_fail_select_alot(self):
        """
@@ -58,9 +58,9 @@ class test_OP(unittest.TestCase):

        n_elements = 100
        n_selected = 200
-        numpy.random.seed(12345)
-        uni = numpy.random.rand(n_selected).astype(config.floatX)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        np.random.seed(12345)
+        uni = np.random.rand(n_selected).astype(config.floatX)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        self.assertRaises(ValueError, f, pvals, uni, n_selected)

@@ -79,18 +79,18 @@ class test_OP(unittest.TestCase):
        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
-        numpy.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        np.random.seed(12345)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
-        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)
+        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)

        for rep in range(10000):
-            uni = numpy.random.rand(n_selected).astype(config.floatX)
+            uni = np.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
-        avg_diff = numpy.mean(abs(avg_pvals - pvals))
+        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff


@@ -110,14 +110,14 @@ class test_function(unittest.TestCase):

        n_elements = 1000
        all_indices = range(n_elements)
-        numpy.random.seed(12345)
+        np.random.seed(12345)
        for i in [5, 10, 50, 100, 500, n_elements]:
-            pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, i)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            assert len(res) == i
-            assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
+            assert np.all(np.in1d(np.unique(res), all_indices)), res

    def test_fail_select_alot(self):
        """
@@ -134,8 +134,8 @@ class test_function(unittest.TestCase):

        n_elements = 100
        n_selected = 200
-        numpy.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        np.random.seed(12345)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        self.assertRaises(ValueError, f, pvals, n_selected)

@@ -155,17 +155,17 @@ class test_function(unittest.TestCase):
        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
-        numpy.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        np.random.seed(12345)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
-        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)
+        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)

        for rep in range(10000):
            res = f(pvals, n_selected)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
-        avg_diff = numpy.mean(abs(avg_pvals - pvals))
+        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol

    def test_unpickle_legacy_op(self):

--- a/theano/sandbox/tests/test_rng_mrg.py
+++ b/theano/sandbox/tests/test_rng_mrg.py
@@ -8,7 +8,7 @@ import functools

 from nose.plugins.skip import SkipTest
 from nose.tools import assert_raises
-import numpy
+import numpy as np
 from six.moves import xrange

 import theano
@@ -44,9 +44,9 @@ utt.seed_rng()
 # 12 streams
 # 7 substreams for each stream
 # 5 samples drawn from each substream
-java_samples = numpy.loadtxt(os.path.join(os.path.split(theano.__file__)[0],
-                                          'sandbox',
-                                          'samples_MRG31k3p_12_7_5.txt'))
+java_samples = np.loadtxt(os.path.join(os.path.split(theano.__file__)[0],
+                                       'sandbox',
+                                       'samples_MRG31k3p_12_7_5.txt'))


 def test_deterministic():
@@ -65,15 +65,15 @@ def test_deterministic():

        fsample1 = f()
        fsample2 = f()
-        assert not numpy.allclose(fsample1, fsample2)
+        assert not np.allclose(fsample1, fsample2)

        R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u2 = R2.uniform(size=sample_size)
        g = theano.function([], u2)
        gsample1 = g()
        gsample2 = g()
-        assert numpy.allclose(fsample1, gsample1)
-        assert numpy.allclose(fsample2, gsample2)
+        assert np.allclose(fsample1, gsample1)
+        assert np.allclose(fsample2, gsample2)


 def test_consistency_randomstreams():
@@ -102,12 +102,12 @@ def test_consistency_randomstreams():
            for j in range(n_samples):
                s = f()
                stream_samples.append(s)
-            stream_samples = numpy.array(stream_samples)
+            stream_samples = np.array(stream_samples)
            stream_samples = stream_samples.T.flatten()
            samples.append(stream_samples)

-        samples = numpy.array(samples).flatten()
-        assert(numpy.allclose(samples, java_samples))
+        samples = np.array(samples).flatten()
+        assert(np.allclose(samples, java_samples))


 def test_get_substream_rstates():
@@ -117,7 +117,7 @@ def test_get_substream_rstates():
        n_streams = 100

        dtype = 'float32'
-        rng = MRG_RandomStreams(numpy.random.randint(2147462579))
+        rng = MRG_RandomStreams(np.random.randint(2147462579))

        rng.get_substream_rstates(n_streams, dtype)

@@ -137,13 +137,13 @@ def test_consistency_cpu_serial():
    n_substreams = 7

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_rstate = curr_rstate.copy()
        for j in range(n_substreams):
-            rstate = theano.shared(numpy.array([stream_rstate.copy()],
-                                               dtype='int32'))
+            rstate = theano.shared(np.array([stream_rstate.copy()],
+                                            dtype='int32'))
            new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None,
                                                         dtype=config.floatX,
                                                         size=(1,))
@@ -164,8 +164,8 @@ def test_consistency_cpu_serial():
        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_consistency_cpu_parallel():
@@ -180,14 +180,14 @@ def test_consistency_cpu_parallel():
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-        rstate = numpy.asarray(rstate)
+        rstate = np.asarray(rstate)
        rstate = theano.shared(rstate)

        new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None,
@@ -205,13 +205,13 @@ def test_consistency_cpu_parallel():
            s = f()
            stream_samples.append(s)

-        samples.append(numpy.array(stream_samples).T.flatten())
+        samples.append(np.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_consistency_GPU_serial():
@@ -233,16 +233,16 @@ def test_consistency_GPU_serial():
    n_substreams = 7

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_rstate = curr_rstate.copy()
        for j in range(n_substreams):
-            substream_rstate = numpy.array(stream_rstate.copy(), dtype='int32')
+            substream_rstate = np.array(stream_rstate.copy(), dtype='int32')
            # HACK - we transfer these int32 to the GPU memory as float32
            # (reinterpret_cast)
-            tmp_float_buf = numpy.frombuffer(substream_rstate.data,
-                                             dtype='float32')
+            tmp_float_buf = np.frombuffer(substream_rstate.data,
+                                          dtype='float32')
            # Transfer to device
            rstate = float32_shared_constructor(tmp_float_buf)

@@ -269,8 +269,8 @@ def test_consistency_GPU_serial():
        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_consistency_GPU_parallel():
@@ -293,17 +293,17 @@ def test_consistency_GPU_parallel():
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-        rstate = numpy.asarray(rstate).flatten()
+        rstate = np.asarray(rstate).flatten()
        # HACK - transfer these int32 to the GPU memory as float32
        # (reinterpret_cast)
-        tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32')
+        tmp_float_buf = np.frombuffer(rstate.data, dtype='float32')
        # Transfer to device
        rstate = float32_shared_constructor(tmp_float_buf)

@@ -325,13 +325,13 @@ def test_consistency_GPU_parallel():
            s = f()
            stream_samples.append(s)

-        samples.append(numpy.array(stream_samples).T.flatten())
+        samples.append(np.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_GPU_nstreams_limit():
@@ -373,13 +373,13 @@ def test_consistency_GPUA_serial():
    n_substreams = 7

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_rstate = curr_rstate.copy()
        for j in range(n_substreams):
-            substream_rstate = numpy.array([stream_rstate.copy()],
-                                           dtype='int32')
+            substream_rstate = np.array([stream_rstate.copy()],
+                                        dtype='int32')
            # Transfer to device
            rstate = gpuarray_shared_constructor(substream_rstate)

@@ -407,8 +407,8 @@ def test_consistency_GPUA_serial():
        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_consistency_GPUA_parallel():
@@ -424,14 +424,14 @@ def test_consistency_GPUA_parallel():
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-        rstate = numpy.asarray(rstate)
+        rstate = np.asarray(rstate)
        rstate = gpuarray_shared_constructor(rstate)

        new_rstate, sample = rng_mrg.GPUA_mrg_uniform.new(rstate, ndim=None,
@@ -452,13 +452,13 @@ def test_consistency_GPUA_parallel():
            s = f()
            stream_samples.append(s)

-        samples.append(numpy.array(stream_samples).T.flatten())
+        samples.append(np.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

-    samples = numpy.array(samples).flatten()
-    assert(numpy.allclose(samples, java_samples))
+    samples = np.array(samples).flatten()
+    assert(np.allclose(samples, java_samples))


 def test_GPUA_full_fill():
@@ -496,16 +496,16 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=None,
        ival = f(*inputs)
        assert ival.shape == sample_size
        dt += time.time() - t0
-        ival = numpy.asarray(ival)
+        ival = np.asarray(ival)
        if i == 0:
-            mean = numpy.array(ival, copy=True)
-            avg_var = numpy.mean((ival - target_avg) ** 2)
+            mean = np.array(ival, copy=True)
+            avg_var = np.mean((ival - target_avg) ** 2)
            min_ = ival.min()
            max_ = ival.max()
        else:
            alpha = 1.0 / (1 + i)
            mean = alpha * ival + (1 - alpha) * mean
-            avg_var = (alpha * numpy.mean((ival - target_avg) ** 2) +
+            avg_var = (alpha * np.mean((ival - target_avg) ** 2) +
                       (1 - alpha) * avg_var)
            min_ = min(min_, ival.min())
            max_ = max(max_, ival.max())
@@ -514,19 +514,19 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=None,
            assert max_ < 1

    if hasattr(target_avg, 'shape'):  # looks if target_avg is an array
-        diff = numpy.mean(abs(mean - target_avg))
+        diff = np.mean(abs(mean - target_avg))
        # print prefix, 'mean diff with mean', diff
-        assert numpy.all(diff < mean_rtol * (1 + abs(target_avg))), (
+        assert np.all(diff < mean_rtol * (1 + abs(target_avg))), (
            'bad mean? %s %s' % (mean, target_avg))
    else:
        # if target_avg is a scalar, then we can do the mean of
        # `mean` to get something more precise
-        mean = numpy.mean(mean)
+        mean = np.mean(mean)
        # print prefix, 'mean', mean
        assert abs(mean - target_avg) < mean_rtol * (1 + abs(target_avg)), (
            'bad mean? %f %f' % (mean, target_avg))

-    std = numpy.sqrt(avg_var)
+    std = np.sqrt(avg_var)
    # print prefix, 'var', avg_var
    # print prefix, 'std', std
    if target_std is not None:
@@ -556,9 +556,9 @@ def test_uniform():
    for size, const_size, var_input, input in [
            (sample_size, sample_size, [], []),
            (x.shape, sample_size, [x],
-             [numpy.zeros(sample_size, dtype=config.floatX)]),
+             [np.zeros(sample_size, dtype=config.floatX)]),
            ((x.shape[0], sample_size[1]), sample_size, [x],
-             [numpy.zeros(sample_size, dtype=config.floatX)]),
+             [np.zeros(sample_size, dtype=config.floatX)]),
            # test empty size (scalar)
            ((), (), [], []),
            ]:
@@ -586,7 +586,7 @@ def test_uniform():
        # print cpu_out[-1, -10:]

        # Increase the number of steps if sizes implies only a few samples
-        if numpy.prod(const_size) < 10:
+        if np.prod(const_size) < 10:
            steps_ = steps * 100
        else:
            steps_ = steps
@@ -607,15 +607,15 @@ def test_uniform():
                                   theano.sandbox.rng_mrg.GPU_mrg_uniform)
                        for node in f.maker.fgraph.toposort()])
            # theano.printing.debugprint(f)
-            gpu_out = numpy.asarray(f(*input))
+            gpu_out = np.asarray(f(*input))

            # print 'GPU: random?[:10], random?[-10:]'
            # print gpu_out[0, 0:10]
            # print gpu_out[-1, -10:]
            basictest(f, steps_, const_size, prefix='mrg  gpu', inputs=input)

-            numpy.testing.assert_array_almost_equal(cpu_out, gpu_out,
-                                                    decimal=6)
+            np.testing.assert_array_almost_equal(cpu_out, gpu_out,
+                                                 decimal=6)

        # print ''
        # print 'ON CPU w Numpy with size=(%s):' % str(size)
@@ -633,7 +633,7 @@ def test_broadcastable():
    x = tensor.matrix()
    size1 = (10, 1)
    size2 = (x.shape[0], 1)
-    pvals_1 = numpy.random.uniform(0, 1, size=size1)
+    pvals_1 = np.random.uniform(0, 1, size=size1)
    pvals_1 = pvals_1 / sum(pvals_1)
    pvals_2 = R.uniform(size=size2)
    pvals_2 = pvals_2 / tensor.sum(pvals_2)
@@ -684,9 +684,9 @@ def test_binomial():
        for size, const_size, var_input, input in [
                (sample_size, sample_size, [], []),
                (x.shape, sample_size, [x],
-                 [numpy.zeros(sample_size, dtype=config.floatX)]),
+                 [np.zeros(sample_size, dtype=config.floatX)]),
                ((x.shape[0], sample_size[1]), sample_size, [x],
-                 [numpy.zeros(sample_size, dtype=config.floatX)]),
+                 [np.zeros(sample_size, dtype=config.floatX)]),
                # test empty size (scalar)
                ((), (), [], []),
                ]:
@@ -701,7 +701,7 @@ def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
    out = f(*input)

    # Increase the number of steps if sizes implies only a few samples
-    if numpy.prod(const_size) < 10:
+    if np.prod(const_size) < 10:
        steps_ = steps * 100
    else:
        steps_ = steps
@@ -717,13 +717,13 @@ def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
        f = theano.function(var_input, theano.Out(
            theano.sandbox.cuda.basic_ops.gpu_from_host(u),
            borrow=True), mode=mode_with_gpu)
-        gpu_out = numpy.asarray(f(*input))
+        gpu_out = np.asarray(f(*input))

        basictest(f, steps_, const_size, prefix='mrg  gpu',
                  inputs=input, allow_01=True,
                  target_avg=mean, mean_rtol=rtol)
-        numpy.testing.assert_array_almost_equal(out, gpu_out,
-                                                decimal=6)
+        np.testing.assert_array_almost_equal(out, gpu_out,
+                                             decimal=6)

    RR = theano.tensor.shared_randomstreams.RandomStreams(234)

@@ -752,22 +752,22 @@ def test_normal0():
    for size, const_size, var_input, input, avg, rtol, std_tol in [
        (sample_size, sample_size, [], [], -5., default_rtol, default_rtol),
        (x.shape, sample_size, [x],
-         [numpy.zeros(sample_size, dtype=config.floatX)],
+         [np.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        ((x.shape[0], sample_size[1]), sample_size, [x],
-         [numpy.zeros(sample_size, dtype=config.floatX)],
+         [np.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        # test odd value
        (sample_size_odd, sample_size_odd, [], [], -5.,
         default_rtol, default_rtol),
        # test odd value
        (x.shape, sample_size_odd, [x],
-         [numpy.zeros(sample_size_odd, dtype=config.floatX)],
+         [np.zeros(sample_size_odd, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        (sample_size, sample_size, [], [],
-         numpy.arange(numpy.prod(sample_size),
-                      dtype='float32').reshape(sample_size),
-         10. * std / numpy.sqrt(steps), default_rtol),
+         np.arange(np.prod(sample_size),
+                   dtype='float32').reshape(sample_size),
+         10. * std / np.sqrt(steps), default_rtol),
        # test empty size (scalar)
        ((), (), [], [], -5., default_rtol, 0.02),
        # test with few samples at the same time
@@ -788,7 +788,7 @@ def test_normal0():
        # print 'random?[:10]\n', out[0, 0:10]

        # Increase the number of steps if size implies only a few samples
-        if numpy.prod(const_size) < 10:
+        if np.prod(const_size) < 10:
            steps_ = steps * 50
        else:
            steps_ = steps
@@ -812,7 +812,7 @@ def test_normal0():

            # theano.printing.debugprint(f)
            sys.stdout.flush()
-            gpu_out = numpy.asarray(f(*input))
+            gpu_out = np.asarray(f(*input))
            # print 'random?[:10]\n', gpu_out[0, 0:10]
            # print '----'
            sys.stdout.flush()
@@ -821,7 +821,7 @@ def test_normal0():
                      mean_rtol=rtol, std_tol=std_tol)
            # Need to allow some rounding error as their is float
            # computation that are done on the gpu vs cpu
-            assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)
+            assert np.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)

        # print ''
        # print 'ON CPU w NUMPY:'
@@ -838,26 +838,26 @@ def basic_multinomialtest(f, steps, sample_size, target_pvals, n_samples,
                          prefix="", mean_rtol=0.04):

    dt = 0.0
-    avg_pvals = numpy.zeros(target_pvals.shape, dtype=config.floatX)
+    avg_pvals = np.zeros(target_pvals.shape, dtype=config.floatX)

    for i in xrange(steps):
        t0 = time.time()
        ival = f()
        assert ival.shape == sample_size
-        assert numpy.all(numpy.sum(ival, axis=1) == n_samples)
+        assert np.all(np.sum(ival, axis=1) == n_samples)
        dt += time.time() - t0
        avg_pvals += ival
    avg_pvals /= (steps * n_samples)

-    assert numpy.mean(abs(avg_pvals - target_pvals)) < mean_rtol
+    assert np.mean(abs(avg_pvals - target_pvals)) < mean_rtol

-    print('random?[:10]\n', numpy.asarray(f()[:10]))
+    print('random?[:10]\n', np.asarray(f()[:10]))
    print(prefix, 'mean', avg_pvals)
    # < mean_rtol, 'bad mean? %s %s' % (str(avg_pvals), str(target_pvals))
-    print(numpy.mean(abs(avg_pvals - target_pvals)))
+    print(np.mean(abs(avg_pvals - target_pvals)))
    print(prefix, 'time', dt)
-    print(prefix, 'elements', steps * numpy.prod(target_pvals.shape))
-    print(prefix, 'samples/sec', steps * numpy.prod(target_pvals.shape) / dt)
+    print(prefix, 'elements', steps * np.prod(target_pvals.shape))
+    print(prefix, 'samples/sec', steps * np.prod(target_pvals.shape) / dt)


 def test_multinomial():
@@ -875,8 +875,8 @@ def test_multinomial():
    # print ''
    # print 'ON CPU:'

-    pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
-    pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals)
+    pvals = np.asarray(np.random.uniform(size=sample_size))
+    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
    R = MRG_RandomStreams(234, use_cuda=False)
    # Note: we specify `nstreams` to avoid a warning.
    m = R.multinomial(pvals=pvals, dtype=config.floatX, nstreams=30 * 256)
@@ -892,7 +892,7 @@ def test_multinomial():
        # print ''
        # print 'ON GPU:'
        R = MRG_RandomStreams(234, use_cuda=True)
-        pvals = numpy.asarray(pvals, dtype='float32')
+        pvals = np.asarray(pvals, dtype='float32')
        # We give the number of streams to avoid a warning.
        n = R.multinomial(pvals=pvals, dtype='float32', nstreams=30 * 256)
        # well, it's really that this test w GPU doesn't make sense otw
@@ -907,7 +907,7 @@ def test_multinomial():
        sys.stdout.flush()
        basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1,
                              prefix='gpu mrg ')
-        numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6)
+        np.testing.assert_array_almost_equal(out, gpu_out, decimal=6)


 def test_multinomial_n_samples():
@@ -922,8 +922,8 @@ def test_multinomial_n_samples():
        sample_size = (450, 6)
    mode_ = theano.compile.mode.get_mode(mode_)

-    pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
-    pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals)
+    pvals = np.asarray(np.random.uniform(size=sample_size))
+    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
    R = MRG_RandomStreams(234, use_cuda=False)

    for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
@@ -936,7 +936,7 @@ def test_multinomial_n_samples():

        if mode != 'FAST_COMPILE' and cuda_available:
            R = MRG_RandomStreams(234, use_cuda=True)
-            pvals = numpy.asarray(pvals, dtype='float32')
+            pvals = np.asarray(pvals, dtype='float32')
            n = R.multinomial(pvals=pvals, n=n_samples,
                              dtype='float32', nstreams=30 * 256)
            assert n.dtype == 'float32'
@@ -999,14 +999,14 @@ def test_random_state_transfer():
    for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
        su2[0].set_value(su1[0].get_value())

-    numpy.testing.assert_array_almost_equal(f1(), f2(), decimal=6)
+    np.testing.assert_array_almost_equal(f1(), f2(), decimal=6)


 def test_gradient_scan():
    # Test for a crash when using MRG inside scan and taking the gradient
    # See https://groups.google.com/d/msg/theano-dev/UbcYyU5m-M8/UO9UgXqnQP0J
    theano_rng = MRG_RandomStreams(10)
-    w = theano.shared(numpy.ones(1, dtype='float32'))
+    w = theano.shared(np.ones(1, dtype='float32'))

    def one_step(x):
        return x + theano_rng.uniform((1,), dtype='float32') * w
@@ -1015,7 +1015,7 @@ def test_gradient_scan():
    values, updates = theano.scan(one_step, outputs_info=x, n_steps=10)
    gw = theano.grad(tensor.sum(values[-1]), w)
    f = theano.function([x], gw)
-    f(numpy.arange(1, dtype='float32'))
+    f(np.arange(1, dtype='float32'))


 def test_multMatVect():
@@ -1029,14 +1029,14 @@ def test_multMatVect():
    g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2)
    f0 = theano.function([A1, s1, m1, A2, s2, m2], g0)

-    i32max = numpy.iinfo(numpy.int32).max
+    i32max = np.iinfo(np.int32).max

-    A1 = numpy.random.randint(0, i32max, (3, 3)).astype('int64')
-    s1 = numpy.random.randint(0, i32max, 3).astype('int32')
-    m1 = numpy.asarray(numpy.random.randint(i32max), dtype="int32")
-    A2 = numpy.random.randint(0, i32max, (3, 3)).astype('int64')
-    s2 = numpy.random.randint(0, i32max, 3).astype('int32')
-    m2 = numpy.asarray(numpy.random.randint(i32max), dtype="int32")
+    A1 = np.random.randint(0, i32max, (3, 3)).astype('int64')
+    s1 = np.random.randint(0, i32max, 3).astype('int32')
+    m1 = np.asarray(np.random.randint(i32max), dtype="int32")
+    A2 = np.random.randint(0, i32max, (3, 3)).astype('int64')
+    s2 = np.random.randint(0, i32max, 3).astype('int32')
+    m2 = np.asarray(np.random.randint(i32max), dtype="int32")

    f0.input_storage[0].storage[0] = A1
    f0.input_storage[1].storage[0] = s1
@@ -1050,8 +1050,8 @@ def test_multMatVect():
    f0.fn()
    r_b = f0.output_storage[0].value

-    assert numpy.allclose(r_a1, r_b[:3])
-    assert numpy.allclose(r_a2, r_b[3:])
+    assert np.allclose(r_a1, r_b[:3])
+    assert np.allclose(r_a2, r_b[3:])


 def test_seed_fn():
@@ -1079,13 +1079,13 @@ def test_seed_fn():

            fn1_val0 = fn1()
            fn1_val1 = fn1()
-            assert not numpy.allclose(fn1_val0, fn1_val1)
+            assert not np.allclose(fn1_val0, fn1_val1)
            fn2_val0 = fn2()
            fn2_val1 = fn2()
-            assert not numpy.allclose(fn2_val0, fn2_val1)
+            assert not np.allclose(fn2_val0, fn2_val1)
            fn3_val0 = fn3([4])
            fn3_val1 = fn3([4])
-            assert not numpy.allclose(fn3_val0, fn3_val1)
+            assert not np.allclose(fn3_val0, fn3_val1)
            assert fn1_val0.size == 4
            assert fn2_val0.size == 9

@@ -1097,12 +1097,12 @@ def test_seed_fn():
            fn2_val3 = fn2()
            fn3_val2 = fn3([4])
            fn3_val3 = fn3([4])
-            assert numpy.allclose(fn1_val0, fn1_val2) == same
-            assert numpy.allclose(fn1_val1, fn1_val3) == same
-            assert numpy.allclose(fn2_val0, fn2_val2) == same
-            assert numpy.allclose(fn2_val1, fn2_val3) == same
-            assert numpy.allclose(fn3_val0, fn3_val2) == same
-            assert numpy.allclose(fn3_val1, fn3_val3) == same
+            assert np.allclose(fn1_val0, fn1_val2) == same
+            assert np.allclose(fn1_val1, fn1_val3) == same
+            assert np.allclose(fn2_val0, fn2_val2) == same
+            assert np.allclose(fn2_val1, fn2_val3) == same
+            assert np.allclose(fn3_val0, fn3_val2) == same
+            assert np.allclose(fn3_val1, fn3_val3) == same


 def rng_mrg_overflow(sizes, fct, mode, should_raise_error):
@@ -1118,7 +1118,7 @@ def rng_mrg_overflow(sizes, fct, mode, should_raise_error):

 def test_overflow_cpu():
    # run with THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32
-    rng = MRG_RandomStreams(numpy.random.randint(1234))
+    rng = MRG_RandomStreams(np.random.randint(1234))
    fct = rng.uniform
    # should raise error as the size overflows
    sizes = [(2**31, ), (2**32, ), (2**15, 2**16,), (2, 2**15, 2**15)]
@@ -1127,8 +1127,8 @@ def test_overflow_cpu():
    sizes = [(2**5, ), (2**5, 2**5), (2**5, 2**5, 2**5)]
    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
    # should support int32 sizes
-    sizes = [(numpy.int32(2**10), ),
-             (numpy.int32(2), numpy.int32(2**10), numpy.int32(2**10))]
+    sizes = [(np.int32(2**10), ),
+             (np.int32(2), np.int32(2**10), np.int32(2**10))]
    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)


@@ -1147,8 +1147,8 @@ def test_overflow_gpu_old_backend():
    sizes = [(2**5, ), (2**5, 2**5), (2**5, 2**5, 2**5)]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)
    # should support int32 sizes
-    sizes = [(numpy.int32(2**10), ),
-             (numpy.int32(2), numpy.int32(2**10), numpy.int32(2**10))]
+    sizes = [(np.int32(2**10), ),
+             (np.int32(2), np.int32(2**10), np.int32(2**10))]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)


@@ -1159,11 +1159,11 @@ def test_overflow_gpu_new_backend():
    from theano.gpuarray.type import gpuarray_shared_constructor
    seed = 12345
    n_substreams = 7
-    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    curr_rstate = np.array([seed] * 6, dtype='int32')
    rstate = [curr_rstate.copy()]
    for j in range(1, n_substreams):
        rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-    rstate = numpy.asarray(rstate)
+    rstate = np.asarray(rstate)
    rstate = gpuarray_shared_constructor(rstate)
    fct = functools.partial(rng_mrg.GPUA_mrg_uniform.new, rstate,
                            ndim=None, dtype='float32')
@@ -1174,8 +1174,8 @@ def test_overflow_gpu_new_backend():
    sizes = [(2**5, ), (2**5, 2**5), (2**5, 2**5, 2**5)]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)
    # should support int32 sizes
-    sizes = [(numpy.int32(2**10), ),
-             (numpy.int32(2), numpy.int32(2**10), numpy.int32(2**10))]
+    sizes = [(np.int32(2**10), ),
+             (np.int32(2), np.int32(2**10), np.int32(2**10))]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)


@@ -1185,12 +1185,12 @@ def test_validate_input_types_gpuarray_backend():
    from theano.configparser import change_flags

    with change_flags(compute_test_value="raise"):
-        rstate = numpy.zeros((7, 6), dtype="int32")
+        rstate = np.zeros((7, 6), dtype="int32")
        rstate = gpuarray_shared_constructor(rstate)
        mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,))

 if __name__ == "__main__":
-    rng = MRG_RandomStreams(numpy.random.randint(2147462579))
+    rng = MRG_RandomStreams(np.random.randint(2147462579))
    print(theano.__file__)
    pvals = theano.tensor.fmatrix()
    for i in range(10):

--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -45,7 +45,7 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"


 import logging
-import numpy
+import numpy as np
 import warnings
 from collections import OrderedDict

@@ -488,8 +488,8 @@ def scan(fn,
        # a sequence, though is highly unlikely in practice
        if 'taps' in seq:
            # go through the indicated slice
-            mintap = numpy.min(seq['taps'])
-            maxtap = numpy.max(seq['taps'])
+            mintap = np.min(seq['taps'])
+            maxtap = np.max(seq['taps'])
            for k in seq['taps']:
                # create one slice of the input
                # Later on, if we decide not to use scan because we are
@@ -670,15 +670,15 @@ def scan(fn,

        elif init_out.get('taps', None):

-            if numpy.any(numpy.array(init_out.get('taps', [])) > 0):
+            if np.any(np.array(init_out.get('taps', [])) > 0):
                # Make sure we do not have requests for future values of a
                # sequence we can not provide such values
                raise ValueError('Can not use future taps of outputs',
                                    init_out)
            # go through the taps
-            mintap = abs(numpy.min(init_out['taps']))
+            mintap = abs(np.min(init_out['taps']))
            mit_sot_tap_array.append(init_out['taps'])
-            idx_offset = abs(numpy.min(init_out['taps']))
+            idx_offset = abs(np.min(init_out['taps']))
            # Sequence
            mit_sot_scan_inputs.append(
                scan_utils.expand_empty(init_out['initial'][:mintap],
@@ -725,9 +725,9 @@ def scan(fn,
        #      a map); in that case we do not have to do anything ..

    # Re-order args
-    max_mit_sot = numpy.max([-1] + mit_sot_rightOrder) + 1
-    max_sit_sot = numpy.max([-1] + sit_sot_rightOrder) + 1
-    n_elems = numpy.max([max_mit_sot, max_sit_sot])
+    max_mit_sot = np.max([-1] + mit_sot_rightOrder) + 1
+    max_sit_sot = np.max([-1] + sit_sot_rightOrder) + 1
+    n_elems = np.max([max_mit_sot, max_sit_sot])
    _ordered_args = [[] for x in xrange(n_elems)]
    offset = 0
    for idx in xrange(n_mit_sot):
@@ -1101,7 +1101,7 @@ def scan(fn,
        return out_ls

    offset = n_mit_mot
-    offsets = [abs(numpy.min(x)) for x in mit_sot_tap_array]
+    offsets = [abs(np.min(x)) for x in mit_sot_tap_array]
    mit_sot_outs = remove_dimensions(
        scan_outs[offset:offset + n_mit_sot],
        mit_sot_return_steps,

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -54,7 +54,7 @@ import logging
 import time
 from collections import OrderedDict

-import numpy
+import numpy as np
 from six import iteritems, integer_types, raise_from
 from six.moves import xrange

@@ -193,7 +193,7 @@ class Scan(PureOp):
        self.info['name'] = self.name

        # Pre-computing some values to speed up perform
-        self.mintaps = [numpy.min(x) for x in self.tap_array]
+        self.mintaps = [np.min(x) for x in self.tap_array]
        self.mintaps += [0 for x in xrange(self.n_nit_sot)]
        self.seqs_arg_offset = 1 + self.n_seqs
        self.shared_arg_offset = (self.seqs_arg_offset +
@@ -336,7 +336,7 @@ class Scan(PureOp):
                          the inner function)

        """
-        assert numpy.all(isinstance(i, gof.Variable) for i in inputs)
+        assert np.all(isinstance(i, gof.Variable) for i in inputs)
        # Check that the number of inputs to the Scan node corresponds to
        # the number of inputs of the inner function of scan
        n_outer_ins = len(inputs) - len(self.outer_nitsot(inputs)) - 1
@@ -901,53 +901,53 @@ class Scan(PureOp):
        try:
            if impl == 'py':
                raise theano.gof.cmodule.MissingGXX
-            cython_mintaps = numpy.asarray(self.mintaps, dtype='int32')
+            cython_mintaps = np.asarray(self.mintaps, dtype='int32')
            cython_tap_array_len = \
-                numpy.asarray([len(x) for x in self.tap_array],
-                              dtype='int32')
+                np.asarray([len(x) for x in self.tap_array],
+                           dtype='int32')
            if len(self.tap_array) == 0:
                d1 = 0
            else:
-                d1 = numpy.max(cython_tap_array_len)
+                d1 = np.max(cython_tap_array_len)
            d0 = len(self.tap_array)
-            cython_tap_array = numpy.zeros((d0, d1), dtype='int32')
+            cython_tap_array = np.zeros((d0, d1), dtype='int32')
            for _d0 in xrange(d0):
                for _d1 in xrange(cython_tap_array_len[_d0]):
                    cython_tap_array[_d0, _d1] = self.tap_array[_d0][_d1]
            cython_mit_mot_out_nslices = \
-                numpy.asarray([len(x) for x in self.mit_mot_out_slices],
-                              dtype='int32')
+                np.asarray([len(x) for x in self.mit_mot_out_slices],
+                           dtype='int32')
            if len(self.mit_mot_out_slices) == 0:
                d1 = 0
            else:
-                d1 = numpy.max(cython_mit_mot_out_nslices)
+                d1 = np.max(cython_mit_mot_out_nslices)
            d0 = len(self.mit_mot_out_slices)
-            cython_mit_mot_out_slices = numpy.zeros((d0, d1),
-                                                    dtype='int32')
+            cython_mit_mot_out_slices = np.zeros((d0, d1),
+                                                 dtype='int32')
            for _d0 in xrange(d0):
                for _d1 in xrange(cython_mit_mot_out_nslices[_d0]):
                    cython_mit_mot_out_slices[_d0, _d1] = \
                        self.mit_mot_out_slices[_d0][_d1]

-            cython_vector_seqs = numpy.asarray(self.vector_seqs,
+            cython_vector_seqs = np.asarray(self.vector_seqs,
+                                            dtype='int32')
+            cython_vector_outs = np.asarray(self.vector_outs,
+                                            dtype='int32')
+            cython_mitmots_preallocated = np.asarray(self.mitmots_preallocated,
+                                                     dtype='int32')
+
+            cython_inps_is_tensor = np.asarray(self.inps_is_tensor,
                                               dtype='int32')
-            cython_vector_outs = numpy.asarray(self.vector_outs,
+            cython_outs_is_tensor = np.asarray(self.outs_is_tensor,
                                               dtype='int32')
-            cython_mitmots_preallocated = numpy.asarray(self.mitmots_preallocated,
-                                                        dtype='int32')
-
-            cython_inps_is_tensor = numpy.asarray(self.inps_is_tensor,
-                                                  dtype='int32')
-            cython_outs_is_tensor = numpy.asarray(self.outs_is_tensor,
-                                                  dtype='int32')

            if hasattr(self, 'destroy_map'):
                cython_destroy_map = [x in self.destroy_map
                                      for x in xrange(len(node.outputs))]
            else:
                cython_destroy_map = [0 for x in xrange(len(node.outputs))]
-            cython_destroy_map = numpy.asarray(cython_destroy_map,
-                                               dtype='int32')
+            cython_destroy_map = np.asarray(cython_destroy_map,
+                                            dtype='int32')
            from . import scan_perform_ext

            def p(node, args, outs):
@@ -2200,9 +2200,9 @@ class Scan(PureOp):
        # Seqs
        outer_inp_seqs = [x[::-1] for x in inputs[1:1 + self.n_seqs]]
        for idx in xrange(self.n_mit_mot + self.n_mit_sot):
-            mintap = numpy.min(self.tap_array[idx])
+            mintap = np.min(self.tap_array[idx])
            if idx < self.n_mit_mot:
-                outmaxtap = numpy.max(self.mitmot_out_taps()[idx])
+                outmaxtap = np.max(self.mitmot_out_taps()[idx])
            else:
                outmaxtap = 0
            seq = outs[idx]
@@ -2226,7 +2226,7 @@ class Scan(PureOp):
            # that.
            for taps, x in zip(self.mitsot_taps(),
                               self.outer_mitsot_outs(outs)):
-                mintap = numpy.min(taps)
+                mintap = np.min(taps)
                if hasattr(x[::-1][:mintap], 'test_value'):
                    assert (x[::-1][:mintap].tag.test_value.shape[0] ==
                            inputs[0].tag.test_value)
@@ -2238,7 +2238,7 @@ class Scan(PureOp):
                if hasattr(x[::-1].tag, 'test_value'):
                    assert (x[::-1].tag.test_value.shape[0] ==
                            inputs[0].tag.test_value)
-        outer_inp_seqs += [x[::-1][:numpy.min(taps)]
+        outer_inp_seqs += [x[::-1][:np.min(taps)]
                           for taps, x in zip(self.mitsot_taps(),
                                              self.outer_mitsot_outs(outs))]
        outer_inp_seqs += [x[::-1][:-1] for x in self.outer_sitsot_outs(outs)]
@@ -2726,8 +2726,8 @@ class Scan(PureOp):
        b = e
        e = e + self.n_mit_mot
        ib = ie
-        ie = ie + int(numpy.sum([len(x) for x in
-                                 self.tap_array[:self.n_mit_mot]]))
+        ie = ie + int(np.sum([len(x) for x in
+                              self.tap_array[:self.n_mit_mot]]))
        clean_eval_points = []
        for inp, evp in zip(inputs[b:e], eval_points[b:e]):
            if evp is not None:
@@ -2742,9 +2742,9 @@ class Scan(PureOp):
        b = e
        e = e + self.n_mit_sot
        ib = ie
-        ie = ie + int(numpy.sum([len(x) for x in
-                                 self.tap_array[self.n_mit_mot:
-                                                self.n_mit_mot + self.n_mit_sot]]))
+        ie = ie + int(np.sum([len(x) for x in
+                              self.tap_array[self.n_mit_mot:
+                                             self.n_mit_mot + self.n_mit_sot]]))
        clean_eval_points = []
        for inp, evp in zip(inputs[b:e], eval_points[b:e]):
            if evp is not None:
@@ -2795,8 +2795,8 @@ class Scan(PureOp):
        inner_other = self_inputs[ie:] + inner_eval_points[ib:]

        # Outputs
-        n_mit_mot_outs = int(numpy.sum([len(x) for x in
-                                        self.mit_mot_out_slices]))
+        n_mit_mot_outs = int(np.sum([len(x) for x in
+                                     self.mit_mot_out_slices]))
        info['n_mit_mot_outs'] = n_mit_mot_outs * 2
        b = 0
        e = n_mit_mot_outs

--- a/theano/scan_module/scan_opt.py
+++ b/theano/scan_module/scan_opt.py
@@ -54,7 +54,7 @@ import logging
 import copy
 from sys import maxsize
 from collections import OrderedDict
-import numpy
+import numpy as np

 import theano
 from theano import tensor, scalar
@@ -636,7 +636,7 @@ class PushOutSeqScan(gof.Optimizer):
                    if out in op.inner_mitsot_outs(ls):
                        odx = op.inner_mitsot_outs(ls).index(out)
                        inp = op.outer_mitsot(node)[odx]
-                        st = abs(numpy.min(op.mitsot_taps()))
+                        st = abs(np.min(op.mitsot_taps()))
                        y = tensor.set_subtensor(inp[st:], _y)
                    elif out in op.inner_sitsot_outs(ls):
                        odx = op.inner_sitsot_outs(ls).index(out)
@@ -1373,7 +1373,7 @@ class ScanSaveMem(gof.Optimizer):
                        # TODO: Simplify the number of steps needed.
                        # FB: This need good testing, left to later.
                        #     call get_scalar_constant_value()? it can
-                        # return python/numpy scalar or numpy.ndarray
+                        # return python/numpy scalar or np.ndarray
                        # currently.
                        # pval = pre_greedy_local_optimizer(list_opt_slice,
                        #                                  pval)

--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
@@ -12,7 +12,7 @@ import os
 import sys
 import warnings

-import numpy
+import numpy as np

 import theano
 from theano import config
@@ -103,7 +103,7 @@ except ImportError:
                # During scan cython development, it is helpful to keep the old interface, to don't manually edit the c file each time.
                preargs.remove('-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION')
            else:
-                numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
+                numpy_ver = [int(n) for n in np.__version__.split('.')[:2]]
                # Add add some macro to lower the number of edit
                # needed to the c file.
                if bool(numpy_ver >= [1, 7]):

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -20,7 +20,7 @@ import logging
 import warnings
 from collections import OrderedDict

-import numpy
+import numpy as np

 import theano
 from theano.compat import izip
@@ -589,8 +589,8 @@ def get_updates_and_outputs(ls):
 def isNaN_or_Inf_or_None(x):
    isNone = x is None
    try:
-        isNaN = numpy.isnan(x)
-        isInf = numpy.isinf(x)
+        isNaN = np.isnan(x)
+        isInf = np.isinf(x)
        isStr = isinstance(x, string_types)
    except Exception:
        isNaN = False
@@ -599,8 +599,8 @@ def isNaN_or_Inf_or_None(x):
    if not isNaN and not isInf:
        try:
            val = get_scalar_constant_value(x)
-            isInf = numpy.isinf(val)
-            isNaN = numpy.isnan(val)
+            isInf = np.isinf(val)
+            isNaN = np.isnan(val)
        except Exception:
            isNaN = False
            isInf = False
@@ -959,7 +959,7 @@ def scan_can_remove_outs(op, out_idxs):
        added = False
        for pos, idx in enumerate(out_idxs):
            if (out_idxs_mask[pos] and
-                 numpy.any([x in required_inputs for x in out_ins[idx]])):
+                 np.any([x in required_inputs for x in out_ins[idx]])):
                # This output is required ..
                out_idxs_mask[pos] = 0
                required_inputs += gof.graph.inputs([op.outputs[idx]])

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -11,7 +11,7 @@ from collections import OrderedDict

 import six.moves.cPickle as pickle
 from six.moves import xrange
-import numpy
+import numpy as np
 from nose.plugins.skip import SkipTest
 from nose.tools import assert_raises
 from nose.tools import raises
@@ -89,7 +89,7 @@ class multiple_outputs_numeric_grad:

        for i, p in enumerate(pt):
            if ndarray_mask[i]:
-                pt[i] = numpy.array(p)
+                pt[i] = np.array(p)
                _eps = type_eps[str(pt[i].dtype)]
                if _eps > dtype_eps:
                    dtype_eps = _eps
@@ -116,12 +116,12 @@ class multiple_outputs_numeric_grad:
                        t[pos] += _eps
                        t = t.reshape(pt[i].shape)
                        f_eps = f(*(pt[:i] + [t] + pt[i + 1:]))
-                        _g.append(numpy.asarray((f_eps - f_x) / _eps))
-                    gx.append(numpy.asarray(_g).reshape(pt[i].shape))
+                        _g.append(np.asarray((f_eps - f_x) / _eps))
+                    gx.append(np.asarray(_g).reshape(pt[i].shape))
                else:
-                    t = numpy.array(pt[i] + _eps)
+                    t = np.array(pt[i] + _eps)
                    f_eps = f(*(pt[:i] + [t] + pt[i + 1:]))
-                    gx.append(numpy.asarray((f_eps - f_x) / _eps))
+                    gx.append(np.asarray((f_eps - f_x) / _eps))
        self.gx = gx

    @staticmethod
@@ -137,8 +137,8 @@ class multiple_outputs_numeric_grad:
        for i in xrange(len(_g_pt)):
            if self.ndarray_mask[i]:
                g_pt.append(_g_pt[i])
-            elif isinstance(_g_pt[i], numpy.ndarray):
-                assert numpy.all(_g_pt[i] == 0)
+            elif isinstance(_g_pt[i], np.ndarray):
+                assert np.all(_g_pt[i] == 0)
        if len(g_pt) != len(self.gx):
            raise ValueError('argument has wrong number of elements',
                             len(g_pt))
@@ -149,12 +149,12 @@ class multiple_outputs_numeric_grad:
                raise ValueError('argument element %i has wrong shape %s' %
                                 (i, str((a.shape, b.shape))))
            vv = multiple_outputs_numeric_grad.abs_rel_err(a, b)
-            errs.append(numpy.max(
+            errs.append(np.max(
                multiple_outputs_numeric_grad.abs_rel_err(a, b)))
-        if numpy.all(numpy.isfinite(errs)):
-            return numpy.max(errs), numpy.argmax(errs)
+        if np.all(np.isfinite(errs)):
+            return np.max(errs), np.argmax(errs)
        else:
-            return numpy.inf, 0
+            return np.inf, 0


 # TODO: Test this function, and if it works,
@@ -262,11 +262,11 @@ class T_Scan(unittest.TestCase):
            if tmpdir is not None:
                shutil.rmtree(tmpdir)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        state = rng.uniform()
        steps = 5

-        numpy_values = numpy.array([state * (2 ** (k + 1)) for k
+        numpy_values = np.array([state * (2 ** (k + 1)) for k
                                    in xrange(steps)])
        theano_values = my_f(state, steps)
        utt.assert_allclose(numpy_values, theano_values)
@@ -300,7 +300,7 @@ class T_Scan(unittest.TestCase):
        assert all(i.value is None for i in scan_node.op.fn.input_storage)
        assert all(o.value is None for o in scan_node.op.fn.output_storage)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        state = rng.uniform()
        steps = 5

@@ -332,11 +332,11 @@ class T_Scan(unittest.TestCase):
                               updates=updates,
                               allow_input_downcast=True)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        state = rng.uniform()
        steps = 5

-        numpy_values = numpy.array([state * (2 ** (k + 1)) for k
+        numpy_values = np.array([state * (2 ** (k + 1)) for k
                                    in xrange(steps)])
        theano_values = my_f(state, steps)
        utt.assert_allclose(numpy_values, theano_values[0])
@@ -370,10 +370,10 @@ class T_Scan(unittest.TestCase):
        # This assertation fails if savemem optimization failed on scan
        if theano.config.mode != "FAST_COMPILE":
            assert nodes[0].op._scan_savemem_visited
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        my_f(rng.uniform(size=(3,)),
             4,
-             numpy.int64([2, 2, 3]))
+             np.int64([2, 2, 3]))

    @attr('slow')
    def test_only_nonseq_inputs(self):
@@ -388,9 +388,9 @@ class T_Scan(unittest.TestCase):
        fun = theano.function([inp], [broadcasted_inp, gr])

        # Execute the Theano function and compare outputs to the expected outputs
-        inputs = numpy.array([[1, 2], [3, 4]], dtype=theano.config.floatX)
-        expected_out1 = numpy.repeat(inputs[None], n_steps, axis=0)
-        expected_out2 = numpy.ones(inputs.shape, dtype="int8") * n_steps
+        inputs = np.array([[1, 2], [3, 4]], dtype=theano.config.floatX)
+        expected_out1 = np.repeat(inputs[None], n_steps, axis=0)
+        expected_out2 = np.ones(inputs.shape, dtype="int8") * n_steps

        out1, out2 = fun(inputs)
        utt.assert_allclose(out1, expected_out1)
@@ -420,14 +420,14 @@ class T_Scan(unittest.TestCase):
                             updates=updates,
                             allow_input_downcast=True)
        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
@@ -437,7 +437,7 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state
    # are vectors, weights are scalars; using shared variables
    def test_one_sequence_one_output_weights_shared(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        u = theano.tensor.vector('u')
        x0 = theano.tensor.scalar('x0')
        W_in = theano.shared(asarrayX(rng.uniform()), name='w_in')
@@ -462,19 +462,19 @@ class T_Scan(unittest.TestCase):
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        # compute the output i numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in.get_value() + v_x0 * W.get_value()
        for step in xrange(1, 4):
            v_out[step] = (v_u[step] * W_in.get_value() +
                           v_out[step - 1] * W.get_value())

        theano_values = f3(v_u, v_x0)
-        assert numpy.allclose(theano_values, v_out)
+        assert np.allclose(theano_values, v_out)

    # some rnn with multiple outputs and multiple inputs; other
    # dimension instead of scalars/vectors
    def test_multiple_inputs_multiple_outputs(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
@@ -511,15 +511,15 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)

        # compute the values in numpy
-        v_x = numpy.zeros((3, 2), dtype=theano.config.floatX)
-        v_y = numpy.zeros((3,), dtype=theano.config.floatX)
-        v_x[0] = (numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 +
-                  numpy.dot(v_x0, vW))
-        v_y[0] = numpy.dot(v_x0, vWout)
+        v_x = np.zeros((3, 2), dtype=theano.config.floatX)
+        v_y = np.zeros((3,), dtype=theano.config.floatX)
+        v_x[0] = (np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 +
+                  np.dot(v_x0, vW))
+        v_y[0] = np.dot(v_x0, vWout)
        for i in xrange(1, 3):
-            v_x[i] = (numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 +
-                      numpy.dot(v_x[i - 1], vW))
-            v_y[i] = numpy.dot(v_x[i - 1], vWout)
+            v_x[i] = (np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 +
+                      np.dot(v_x[i - 1], vW))
+            v_y[i] = np.dot(v_x[i - 1], vWout)

        (theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
        utt.assert_allclose(theano_x, v_x)
@@ -527,7 +527,7 @@ class T_Scan(unittest.TestCase):

    def test_multiple_outs_taps(self):
        l = 5
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
@@ -579,40 +579,40 @@ class T_Scan(unittest.TestCase):
                       v_y0,
                       vW_in1)

-        ny0 = numpy.zeros((5, 2))
-        ny1 = numpy.zeros((5,))
-        ny2 = numpy.zeros((5, 2))
-        ny0[0] = numpy.dot(v_u1[0], vW_in1) + \
-                (v_u2[1] + v_u2[0] * v_u2[2]) * vW_in2 + numpy.dot(v_x0, vW)
+        ny0 = np.zeros((5, 2))
+        ny1 = np.zeros((5,))
+        ny2 = np.zeros((5, 2))
+        ny0[0] = np.dot(v_u1[0], vW_in1) + \
+                (v_u2[1] + v_u2[0] * v_u2[2]) * vW_in2 + np.dot(v_x0, vW)

-        ny1[0] = (v_y0[2] + v_y0[0]) * numpy.dot(v_x0, vWout)
-        ny2[0] = numpy.dot(v_u1[0], vW_in1)
+        ny1[0] = (v_y0[2] + v_y0[0]) * np.dot(v_x0, vWout)
+        ny2[0] = np.dot(v_u1[0], vW_in1)

-        ny0[1] = numpy.dot(v_u1[1], vW_in1) + \
-                (v_u2[2] + v_u2[1] * v_u2[3]) * vW_in2 + numpy.dot(ny0[0], vW)
+        ny0[1] = np.dot(v_u1[1], vW_in1) + \
+                (v_u2[2] + v_u2[1] * v_u2[3]) * vW_in2 + np.dot(ny0[0], vW)

-        ny1[1] = (ny1[0] + v_y0[1]) * numpy.dot(ny0[0], vWout)
-        ny2[1] = numpy.dot(v_u1[1], vW_in1)
+        ny1[1] = (ny1[0] + v_y0[1]) * np.dot(ny0[0], vWout)
+        ny2[1] = np.dot(v_u1[1], vW_in1)

-        ny0[2] = numpy.dot(v_u1[2], vW_in1) + \
+        ny0[2] = np.dot(v_u1[2], vW_in1) + \
                (v_u2[3] + v_u2[2] * v_u2[4]) * vW_in2 + \
-                numpy.dot(ny0[1], vW)
-        ny1[2] = (ny1[1] + v_y0[2]) * numpy.dot(ny0[1], vWout)
-        ny2[2] = numpy.dot(v_u1[2], vW_in1)
+                np.dot(ny0[1], vW)
+        ny1[2] = (ny1[1] + v_y0[2]) * np.dot(ny0[1], vWout)
+        ny2[2] = np.dot(v_u1[2], vW_in1)

-        ny0[3] = numpy.dot(v_u1[3], vW_in1) + \
+        ny0[3] = np.dot(v_u1[3], vW_in1) + \
                           (v_u2[4] + v_u2[3] * v_u2[5]) * vW_in2 + \
-                           numpy.dot(ny0[2], vW)
+                           np.dot(ny0[2], vW)

-        ny1[3] = (ny1[2] + ny1[0]) * numpy.dot(ny0[2], vWout)
-        ny2[3] = numpy.dot(v_u1[3], vW_in1)
+        ny1[3] = (ny1[2] + ny1[0]) * np.dot(ny0[2], vWout)
+        ny2[3] = np.dot(v_u1[3], vW_in1)

-        ny0[4] = numpy.dot(v_u1[4], vW_in1) + \
+        ny0[4] = np.dot(v_u1[4], vW_in1) + \
                           (v_u2[5] + v_u2[4] * v_u2[6]) * vW_in2 + \
-                           numpy.dot(ny0[3], vW)
+                           np.dot(ny0[3], vW)

-        ny1[4] = (ny1[3] + ny1[1]) * numpy.dot(ny0[3], vWout)
-        ny2[4] = numpy.dot(v_u1[4], vW_in1)
+        ny1[4] = (ny1[3] + ny1[1]) * np.dot(ny0[3], vWout)
+        ny2[4] = np.dot(v_u1[4], vW_in1)

    def test_using_taps_sequence(self):
        # this test refers to a bug reported by Nicolas
@@ -621,9 +621,9 @@ class T_Scan(unittest.TestCase):
        y, updates = theano.scan(lambda x: [x],
                                 sequences=dict(input=x, taps=[-1]),
                                 outputs_info=[None])
-        inp = numpy.arange(5).astype('float64')
+        inp = np.arange(5).astype('float64')
        rval = theano.function([x], y, updates=updates)(inp)
-        assert numpy.all(rval == inp[:-1])
+        assert np.all(rval == inp[:-1])

    def test_using_negative_taps_sequence(self):
        # This test refers to a bug reported on github on May 22 2015 by
@@ -636,7 +636,7 @@ class T_Scan(unittest.TestCase):
        f = theano.function([x], res, updates = upd)

        output =  f([1, 2, 3, 4, 5])
-        expected_output = numpy.array([1, 2, 3], dtype="float32")
+        expected_output = np.array([1, 2, 3], dtype="float32")
        utt.assert_allclose(output, expected_output)

    def test_connection_pattern(self):
@@ -649,8 +649,8 @@ class T_Scan(unittest.TestCase):
        def fn(a_m2, a_m1, b_m2, b_m1):
            return a_m1, b_m1

-        a0 = theano.shared(numpy.arange(2))
-        b0 = theano.shared(numpy.arange(2))
+        a0 = theano.shared(np.arange(2))
+        b0 = theano.shared(np.arange(2))

        (a, b), _ = theano.scan(fn,
                        outputs_info=[{'initial': a0, 'taps': [-2, -1]},
@@ -741,7 +741,7 @@ class T_Scan(unittest.TestCase):

        # Call verify_grad to ensure the correctness of the second gradients
        floatX = theano.config.floatX
-        inputs_test_values = [numpy.random.random((3)).astype(floatX)]
+        inputs_test_values = [np.random.random((3)).astype(floatX)]
        theano.tests.unittest_tools.verify_grad(get_sum_of_grad,
                                                inputs_test_values)

@@ -768,8 +768,8 @@ class T_Scan(unittest.TestCase):

        # Call verify_grad to ensure the correctness of the second gradients
        floatX = theano.config.floatX
-        inputs_test_values = [numpy.random.random((2, 3)).astype(floatX),
-                              numpy.random.random((3)).astype(floatX)]
+        inputs_test_values = [np.random.random((2, 3)).astype(floatX),
+                              np.random.random((3)).astype(floatX)]
        theano.tests.unittest_tools.verify_grad(get_sum_of_grad,
                                                inputs_test_values)

@@ -781,7 +781,7 @@ class T_Scan(unittest.TestCase):

        # forward pass
        W = theano.shared(
-            numpy.random.randn(2, 2).astype('float32'),
+            np.random.randn(2, 2).astype('float32'),
            name="W", borrow=True)

        def forward_scanner(x_t):
@@ -807,7 +807,7 @@ class T_Scan(unittest.TestCase):
    # vectors, weights are scalars; using shared variables and past
    # taps (sequences and outputs)
    def test_using_taps_input_output(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW = asarrayX(rng.uniform())
        vW_in = asarrayX(rng.uniform())
        vu = asarrayX(rng.uniform(size=(4,), low=-5., high=5.))
@@ -843,7 +843,7 @@ class T_Scan(unittest.TestCase):
        # in scan) which might seem strange, but then again why not use
        # v_0[t] instead of v_0[t-2] in a real application ??
        # also vx0[0] corresponds to vx0[-2], vx0[1] to vx0[-1]
-        numpy_out = numpy.zeros((2,))
+        numpy_out = np.zeros((2,))
        numpy_out[0] = vu[0] * vW_in + vx0[1] * vW + vx0[0]
        numpy_out[1] = vu[1] * vW_in + numpy_out[0] * vW + vx0[1]
        utt.assert_allclose(numpy_out, theano_out)
@@ -852,7 +852,7 @@ class T_Scan(unittest.TestCase):
    # vectors, weights are scalars; using shared variables and past
    # taps (sequences and outputs) and future taps for sequences
    def test_past_future_taps_shared(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW = asarrayX(rng.uniform())
        vW_in = asarrayX(rng.uniform())
        vu = asarrayX(rng.uniform(size=(6,), low=-5., high=5.))
@@ -880,7 +880,7 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)
        theano_out = f8(vu, vx0)
        # compute output in numpy
-        numpy_out = numpy.zeros(2)
+        numpy_out = np.zeros(2)
        # think of vu[0] as vu[-2], vu[4] as vu[2]
        # and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
        numpy_out[0] = (vu[0] + vu[4]) * vW_in + vx0[1] * vW + vx0[0]
@@ -889,9 +889,9 @@ class T_Scan(unittest.TestCase):

    # simple rnn ; compute inplace version 1
    def test_inplace1(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        vW = asarrayX(numpy.random.uniform())
-        vW_in = asarrayX(numpy.random.uniform())
+        rng = np.random.RandomState(utt.fetch_seed())
+        vW = asarrayX(np.random.uniform())
+        vW_in = asarrayX(np.random.uniform())
        vu0 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
        vu1 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
        vu2 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
@@ -934,8 +934,8 @@ class T_Scan(unittest.TestCase):
        assert 0 in scan_node[0].op.destroy_map.keys()
        assert 1 in scan_node[0].op.destroy_map.keys()
        # compute output in numpy
-        numpy_x0 = numpy.zeros((3,))
-        numpy_x1 = numpy.zeros((3,))
+        numpy_x0 = np.zeros((3,))
+        numpy_x1 = np.zeros((3,))
        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu2[0]
        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu1[0] + vu2[0]
        for i in xrange(1, 3):
@@ -953,9 +953,9 @@ class T_Scan(unittest.TestCase):

    # simple rnn ; compute inplace version 2
    def test_inplace2(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        vW = asarrayX(numpy.random.uniform())
-        vW_in = asarrayX(numpy.random.uniform())
+        rng = np.random.RandomState(utt.fetch_seed())
+        vW = asarrayX(np.random.uniform())
+        vW_in = asarrayX(np.random.uniform())
        vu0 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
        vu1 = asarrayX(rng.uniform(size=(4,), low=-5., high=5.))
        vu2 = asarrayX(rng.uniform(size=(5,), low=-5., high=5.))
@@ -1006,8 +1006,8 @@ class T_Scan(unittest.TestCase):
        assert 0 in scan_node[0].op.destroy_map.keys()
        assert 1 in scan_node[0].op.destroy_map.keys()
        # compute output in numpy
-        numpy_x0 = numpy.zeros((3,))
-        numpy_x1 = numpy.zeros((3,))
+        numpy_x0 = np.zeros((3,))
+        numpy_x1 = np.zeros((3,))
        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu1[1]
        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0] + vu2[1] + vu2[2]
        for i in xrange(1, 3):
@@ -1024,7 +1024,7 @@ class T_Scan(unittest.TestCase):
        utt.assert_allclose(theano_x1, numpy_x1)

    def test_inplace3(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())

        vx0 = asarrayX(rng.uniform())
        vx1 = asarrayX(rng.uniform())
@@ -1035,7 +1035,7 @@ class T_Scan(unittest.TestCase):
                                       [],
                                       [x0, x1],
                                       n_steps=3)
-        x0 = asarrayX(numpy.zeros((3,)))
+        x0 = asarrayX(np.zeros((3,)))
        x0[0] = vx0
        x0 = theano.tensor.constant(x0)
        to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
@@ -1053,7 +1053,7 @@ class T_Scan(unittest.TestCase):

    # Shared variable with updates
    def test_shared_arguments_with_updates(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())

        vW1 = asarrayX(rng.rand(2, 3))
        vW2 = asarrayX(rng.rand(3, 2))
@@ -1128,22 +1128,22 @@ class T_Scan(unittest.TestCase):
        theano_y0, theano_y1, theano_y2 = allstuff

        # do things in numpy
-        numpy_y0 = numpy.zeros((6, 2))
-        numpy_y1 = numpy.zeros((4, 2))
-        numpy_y2 = numpy.zeros((3, 3))
+        numpy_y0 = np.zeros((6, 2))
+        numpy_y1 = np.zeros((4, 2))
+        numpy_y2 = np.zeros((3, 3))
        numpy_y0[:3] = vy0
        numpy_y1[0] = vy1
        numpy_W1 = vW1.copy()
        numpy_W2 = vW2.copy()
        for idx in xrange(3):
-            numpy_y0[idx + 3] = numpy.dot(numpy.dot(vu1[idx, :], numpy_W1),
+            numpy_y0[idx + 3] = np.dot(np.dot(vu1[idx, :], numpy_W1),
                                          numpy_W2) + \
                                0.1 * numpy_y0[idx + 2] + \
                                0.33 * numpy_y0[idx + 1] + \
                                0.17 * numpy_y0[idx]
-            numpy_y1[idx + 1] = (numpy.dot(vu2[idx, :], numpy_W2) +
+            numpy_y1[idx + 1] = (np.dot(vu2[idx, :], numpy_W2) +
                                 numpy_y1[idx])
-            numpy_y2[idx] = numpy.dot(vu1[idx, :], numpy_W1)
+            numpy_y2[idx] = np.dot(vu1[idx, :], numpy_W1)
            numpy_W1 = numpy_W1 + .1
            numpy_W2 = numpy_W2 + .05

@@ -1174,7 +1174,7 @@ class T_Scan(unittest.TestCase):
        f = theano.function([c, x, y], [gX, gY],
                            allow_input_downcast=True)
        # Check for runtime errors
-        f(numpy.int32(0), numpy.float32(1.), numpy.float32(.5))
+        f(np.int32(0), np.float32(1.), np.float32(.5))

    def test_simple_shared_mrg_random(self):
        theano_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(utt.fetch_seed())
@@ -1211,10 +1211,10 @@ class T_Scan(unittest.TestCase):
                               updates=updates,
                               allow_input_downcast=True)

-        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2 ** 30)
-        rng = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit
+        rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2 ** 30)
+        rng = np.random.RandomState(int(rng_seed))  # int() is for 32bit

-        numpy_v = numpy.zeros((10, 2))
+        numpy_v = np.zeros((10, 2))
        for i in xrange(10):
            numpy_v[i] = rng.uniform(-1, 1, size=(2,))

@@ -1224,12 +1224,12 @@ class T_Scan(unittest.TestCase):
        utt.assert_allclose(theano_v, numpy_v[5:, :])

    def test_gibbs_chain(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        v_W = numpy.array(rng.rand(20, 30) - .5, dtype='float32')
-        v_vsample = numpy.array(rng.binomial(1, .5, size=(3, 20),),
+        rng = np.random.RandomState(utt.fetch_seed())
+        v_W = np.array(rng.rand(20, 30) - .5, dtype='float32')
+        v_vsample = np.array(rng.binomial(1, .5, size=(3, 20),),
                                dtype='float32')
-        v_bvis = numpy.array(rng.rand(20) - .5, dtype='float32')
-        v_bhid = numpy.array(rng.rand(30) - .5, dtype='float32')
+        v_bvis = np.array(rng.rand(20) - .5, dtype='float32')
+        v_bhid = np.array(rng.rand(30) - .5, dtype='float32')
        W = theano.shared(v_W, 'vW')
        bhid = theano.shared(v_bhid, 'vbhid')
        bvis = theano.shared(v_bvis, 'vbvis')
@@ -1261,24 +1261,24 @@ class T_Scan(unittest.TestCase):
                               updates=updates,
                               allow_input_downcast=True)

-        _rng = numpy.random.RandomState(utt.fetch_seed())
+        _rng = np.random.RandomState(utt.fetch_seed())
        rng_seed = _rng.randint(2 ** 30)
-        nrng1 = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit
+        nrng1 = np.random.RandomState(int(rng_seed))  # int() is for 32bit

        rng_seed = _rng.randint(2 ** 30)
-        nrng2 = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit
+        nrng2 = np.random.RandomState(int(rng_seed))  # int() is for 32bit

        def numpy_implementation(vsample):
            for idx in range(10):
-                hmean = 1. / (1. + numpy.exp(-(numpy.dot(vsample, v_W) +\
+                hmean = 1. / (1. + np.exp(-(np.dot(vsample, v_W) +\
                        v_bhid)))
-                hsample = numpy.array(nrng1.binomial(1,
+                hsample = np.array(nrng1.binomial(1,
                                                     hmean,
                                                     size=hmean.shape),
                                      dtype='float32')
-                vmean = 1. / (1. + numpy.exp(-(numpy.dot(hsample, v_W.T) +\
+                vmean = 1. / (1. + np.exp(-(np.dot(hsample, v_W.T) +\
                        v_bvis)))
-                vsample = numpy.array(nrng2.binomial(1,
+                vsample = np.array(nrng2.binomial(1,
                                                     vmean,
                                                     size=vmean.shape),
                                      dtype='float32')
@@ -1290,7 +1290,7 @@ class T_Scan(unittest.TestCase):
        utt.assert_allclose(t_result, n_result)

    def test_only_shared_no_input_no_output(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_state = asarrayX(rng.uniform())
        state = theano.shared(v_state, 'vstate')

@@ -1331,7 +1331,7 @@ class T_Scan(unittest.TestCase):
                             outputs,
                             updates=updates,
                             allow_input_downcast=True)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())

        v_u = rng.uniform(size=(5,), low=-5., high=5.)
        numpy_result = v_u + 3
@@ -1352,7 +1352,7 @@ class T_Scan(unittest.TestCase):
                            updates=abs_updates,
                            allow_input_downcast=True)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vals = rng.uniform(size=(10,), low=-5., high=5.)
        abs_vals = abs(vals)
        theano_vals = f(vals)
@@ -1380,14 +1380,14 @@ class T_Scan(unittest.TestCase):
                             updates=updates,
                             allow_input_downcast=True)
        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[3] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[3 - step] * W_in + v_out[step - 1] * W
@@ -1404,9 +1404,9 @@ class T_Scan(unittest.TestCase):
                            result,
                            updates=updates,
                            allow_input_downcast=True)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_v = rng.uniform(size=(5,), low=-5., high=5.)
-        assert abs(numpy.sum(v_v) - f(v_v, 0.)) < 1e-3
+        assert abs(np.sum(v_v) - f(v_v, 0.)) < 1e-3

    def test_grad_one_output(self):
        def f_rnn(u_t, x_tm1, W_in, W):
@@ -1440,12 +1440,12 @@ class T_Scan(unittest.TestCase):
            allow_input_downcast=True)

        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        v_u = numpy.array(rng.uniform(size=(10,), low=-.5, high=.5),
+        rng = np.random.RandomState(utt.fetch_seed())
+        v_u = np.array(rng.uniform(size=(10,), low=-.5, high=.5),
                          dtype=theano.config.floatX)
-        v_x0 = numpy.array(rng.uniform(), dtype=theano.config.floatX)
-        W = numpy.array(rng.uniform(), dtype=theano.config.floatX)
-        W_in = numpy.array(rng.uniform(), dtype=theano.config.floatX)
+        v_x0 = np.array(rng.uniform(), dtype=theano.config.floatX)
+        W = np.array(rng.uniform(), dtype=theano.config.floatX)
+        W_in = np.array(rng.uniform(), dtype=theano.config.floatX)
        analytic_grad = grad_fn(v_u, v_x0, W_in, W)

        num_grad = multiple_outputs_numeric_grad(
@@ -1459,7 +1459,7 @@ class T_Scan(unittest.TestCase):
                             num_grad.gx[max_err_pos]))

    def test_grad_multiple_outs(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-.1, high=.1))
        vWout = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
@@ -1524,7 +1524,7 @@ class T_Scan(unittest.TestCase):
    @attr('slow')
    def test_grad_multiple_outs_taps(self):
        l = 5
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
@@ -1618,7 +1618,7 @@ class T_Scan(unittest.TestCase):
    @attr('slow')
    def test_grad_multiple_outs_taps_backwards(self):
        l = 5
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
@@ -1685,10 +1685,10 @@ class T_Scan(unittest.TestCase):
                             num_grad.gx[max_err_pos]))

    def test_grad_multiple_outs_some_uncomputable(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in = asarrayX(rng.uniform(size=(2, 2), low=-3., high=3.))
        v_u = asarrayX(rng.uniform(size=(5, 2), low=-3., high=3.))
-        v_u2 = numpy.array([1, 3, 4, 6, 8], dtype='int32')
+        v_u2 = np.array([1, 3, 4, 6, 8], dtype='int32')
        v_x0 = asarrayX(rng.uniform(size=(2,), low=-3., high=3.))

        W_in = theano.tensor.matrix('win')
@@ -1730,9 +1730,9 @@ class T_Scan(unittest.TestCase):
        def reset_rng_fn(fn, *args):
            for idx, arg in enumerate(fn.maker.expanded_inputs):
                if (arg.value and type(arg.value.data) == \
-                    type(numpy.random.RandomState(123))):
+                    type(np.random.RandomState(123))):
                    obj = fn.maker.expanded_inputs[idx].value
-                    obj.data = numpy.random.RandomState(123)
+                    obj.data = np.random.RandomState(123)
                    fn.maker.expanded_inputs[idx].value = obj
            return fn(*args)

@@ -1764,7 +1764,7 @@ class T_Scan(unittest.TestCase):
        assert(result == expected_result)

    def test_grad_multiple_outs_some_truncate(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in = asarrayX(rng.uniform(size=(2, 2), low=-.1, high=.1))
        v_u = asarrayX(rng.uniform(size=(5, 2), low=-.1, high=.1))
        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
@@ -1807,9 +1807,9 @@ class T_Scan(unittest.TestCase):
        def reset_rng_fn(fn, *args):
            for idx, arg in enumerate(fn.maker.expanded_inputs):
                if (arg.value and
-                    isinstance(arg.value.data, numpy.random.RandomState)):
+                    isinstance(arg.value.data, np.random.RandomState)):
                    obj = fn.maker.expanded_inputs[idx].value
-                    obj.data = numpy.random.RandomState(123)
+                    obj.data = np.random.RandomState(123)
                    fn.maker.expanded_inputs[idx].value = obj
            out = fn(*args)
            return out
@@ -1819,7 +1819,7 @@ class T_Scan(unittest.TestCase):
        num_grad = multiple_outputs_numeric_grad(
            reset_rng_cost_fn, [v_u, v_x0, vW_in])
        analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
-        utt.assert_allclose(analytic_grad[0][:2], numpy.zeros((2, 2)))
+        utt.assert_allclose(analytic_grad[0][:2], np.zeros((2, 2)))

    def test_grad_multiple_outs_some_disconnected(self):
        final_cost = self._grad_mout_helper(100, mode_nodebug)
@@ -1833,7 +1833,7 @@ class T_Scan(unittest.TestCase):
    def _grad_mout_helper(self, n_iters, mode):
        # Created on Tue Oct 07 13:28:51 2014
        # @author: vaneetke
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        n_hid = 3
        n_in = 1
        n_out = 1
@@ -1897,10 +1897,10 @@ class T_Scan(unittest.TestCase):
                                      mode=mode)

        # artificial data
-        x_v = numpy.arange(0., 10.49, 0.21, dtype=theano.config.floatX)
+        x_v = np.arange(0., 10.49, 0.21, dtype=theano.config.floatX)
        x_v = x_v.reshape(len(x_v), 1)
-        s_v = numpy.sin(x_v)
-        t_v = numpy.roll(s_v, -1)[:-1]
+        s_v = np.sin(x_v)
+        t_v = np.roll(s_v, -1)[:-1]
        s_v = s_v[:-1]
        for i in xrange(n_iters):
            cost = learn_rnn_fn(s_v, t_v)
@@ -1919,14 +1919,14 @@ class T_Scan(unittest.TestCase):
                            updates=updates,
                            allow_input_downcast=True)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        nx = rng.uniform(size=(10, 10))
        ny1, nz1 = f(nx)
        ny2, nz2 = f(nx)

        utt.assert_allclose([ny1, ny1], nz1)
        utt.assert_allclose([ny2, ny2], nz2)
-        assert not numpy.allclose(ny1, ny2)
+        assert not np.allclose(ny1, ny2)

    def test_grad_of_shared(self):
        x1 = theano.shared(3.)
@@ -1942,7 +1942,7 @@ class T_Scan(unittest.TestCase):

    def test_computing_gradient(self):
        x1 = theano.tensor.scalar('x1')
-        x2 = theano.shared(numpy.array([1, 2, 3, 4, 5]), name='x2')
+        x2 = theano.shared(np.array([1, 2, 3, 4, 5]), name='x2')
        K = x2 * x1

        out, updates = theano.scan(lambda i, v: theano.tensor.grad(K[i], v),
@@ -1950,10 +1950,10 @@ class T_Scan(unittest.TestCase):
                non_sequences=x1)
        f = theano.function([x1], out, allow_input_downcast=True)

-        assert numpy.all(f(3.) != 0.)
+        assert np.all(f(3.) != 0.)

    def test_shared_updates(self):
-        X = theano.shared(numpy.array(1))
+        X = theano.shared(np.array(1))

        out, updates = theano.scan(
            lambda: OrderedDict([(X, (X + 1))]),
@@ -1967,8 +1967,8 @@ class T_Scan(unittest.TestCase):
        assert X.get_value() == 11

    def test_memory_aliasing_updates(self):
-        x = theano.shared(numpy.array(1))
-        y = theano.shared(numpy.array(1))
+        x = theano.shared(np.array(1))
+        y = theano.shared(np.array(1))

        out, updates = theano.scan(
            lambda: OrderedDict([(x, x + 1), (y, x)]),
@@ -1979,7 +1979,7 @@ class T_Scan(unittest.TestCase):

        f = theano.function([], [], updates=updates)
        f()
-        assert not numpy.may_share_memory(x.container.storage[0],
+        assert not np.may_share_memory(x.container.storage[0],
                                          y.container.storage[0])

        assert x.get_value() != y.get_value()
@@ -1998,7 +1998,7 @@ class T_Scan(unittest.TestCase):
        """
        a = theano.tensor.vector()
        init_a = theano.tensor.vector()
-        b = theano.shared(numpy.random.rand(5, 4))
+        b = theano.shared(np.random.rand(5, 4))

        def inner_func(a):
            return a + 1, OrderedDict([(b, 2 * b)])
@@ -2032,9 +2032,9 @@ class T_Scan(unittest.TestCase):
            non_sequences=[gy, x])

        f = theano.function([x, A], hy, allow_input_downcast=True)
-        vx = numpy.array([1., 1.], dtype=theano.config.floatX)
-        vA = numpy.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
-        vR = numpy.array([[3.6, 1.8], [1.8, 0.9]], dtype=theano.config.floatX)
+        vx = np.array([1., 1.], dtype=theano.config.floatX)
+        vA = np.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
+        vR = np.array([[3.6, 1.8], [1.8, 0.9]], dtype=theano.config.floatX)
        out = f(vx, vA)

        utt.assert_allclose(out, vR)
@@ -2157,7 +2157,7 @@ class T_Scan(unittest.TestCase):
    # some rnn with multiple outputs and multiple inputs; other
    # dimension instead of scalars/vectors
    def test_reordering(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
@@ -2200,15 +2200,15 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)

        # compute the values in numpy
-        v_x = numpy.zeros((3, 2), dtype=theano.config.floatX)
-        v_y = numpy.zeros((3,), dtype=theano.config.floatX)
-        v_x[0] = numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
-                    numpy.dot(v_x0, vW)
-        v_y[0] = numpy.dot(v_x0, vWout) + v_y0[2]
+        v_x = np.zeros((3, 2), dtype=theano.config.floatX)
+        v_y = np.zeros((3,), dtype=theano.config.floatX)
+        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
+                    np.dot(v_x0, vW)
+        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]
        for i in xrange(1, 3):
-            v_x[i] = numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
-                        numpy.dot(v_x[i - 1], vW)
-            v_y[i] = numpy.dot(v_x[i - 1], vWout) + v_y[i - 1]
+            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
+                        np.dot(v_x[i - 1], vW)
+            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]

        (theano_dump1, theano_dump2, theano_x, theano_y) = f4(v_u1,
                                                              v_u2,
@@ -2247,7 +2247,7 @@ class T_Scan(unittest.TestCase):
                                 allow_input_downcast=True)

    def test_save_mem(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
@@ -2288,16 +2288,16 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)

        # compute the values in numpy
-        v_x = numpy.zeros((8, 2), dtype=theano.config.floatX)
-        v_y = numpy.zeros((8,), dtype=theano.config.floatX)
-        v_x[0] = numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
-                        numpy.dot(v_x0, vW)
-        v_y[0] = numpy.dot(v_x0, vWout) + v_y0[2]
+        v_x = np.zeros((8, 2), dtype=theano.config.floatX)
+        v_y = np.zeros((8,), dtype=theano.config.floatX)
+        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
+                        np.dot(v_x0, vW)
+        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]

        for i in xrange(1, 8):
-            v_x[i] = numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
-                        numpy.dot(v_x[i - 1], vW)
-            v_y[i] = numpy.dot(v_x[i - 1], vWout) + v_y[i - 1]
+            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
+                        np.dot(v_x[i - 1], vW)
+            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]

        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)

@@ -2321,24 +2321,24 @@ class T_Scan(unittest.TestCase):

        sh = expr.shape[0]

-        v1 = theano.shared(numpy.ones(5, dtype=theano.config.floatX))
-        v2 = theano.shared(numpy.ones((5, 5), dtype=theano.config.floatX))
+        v1 = theano.shared(np.ones(5, dtype=theano.config.floatX))
+        v2 = theano.shared(np.ones((5, 5), dtype=theano.config.floatX))
        shapef = theano.function([W],
                                 expr,
                                 givens=OrderedDict([(initial, v1),
                                         (inpt, v2)]))
        # First execution to cache n_steps
-        shapef(numpy.ones((5, 5), dtype=theano.config.floatX))
+        shapef(np.ones((5, 5), dtype=theano.config.floatX))

        cost = expr.sum()
        d_cost_wrt_W = tensor.grad(cost, [W])
        f = theano.function(
            [W, inpt], d_cost_wrt_W,
-            givens=OrderedDict([(initial, theano.shared(numpy.zeros(5)))]))
+            givens=OrderedDict([(initial, theano.shared(np.zeros(5)))]))

-        rval = numpy.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
-        arg1 = numpy.ones((5, 5), dtype=theano.config.floatX)
-        arg2 = numpy.ones((10, 5), dtype=theano.config.floatX)
+        rval = np.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
+        arg1 = np.ones((5, 5), dtype=theano.config.floatX)
+        arg2 = np.ones((10, 5), dtype=theano.config.floatX)
        utt.assert_allclose(f(arg1, arg2), rval)

    def test_save_mem_reduced_number_of_steps(self):
@@ -2372,7 +2372,7 @@ class T_Scan(unittest.TestCase):
                              updates=updates,
                              allow_input_downcast=True)
        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(20,), low=-5., high=5.)

        # compute the output in numpy
@@ -2428,7 +2428,7 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)

        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(20,), low=-5., high=5.)

        # compute the output in numpy
@@ -2474,7 +2474,7 @@ class T_Scan(unittest.TestCase):
        floatX = theano.config.floatX

        init_value = 5.0
-        seq_value = numpy.arange(4, dtype=floatX)
+        seq_value = np.arange(4, dtype=floatX)
        output1, output2 = fct(init_value, seq_value)

        expected_output1 = [init_value]
@@ -2509,13 +2509,13 @@ class T_Scan(unittest.TestCase):
                              [out1_direct, out2_direct])

        # Test that the function returns valid outputs
-        x_val = numpy.arange(0, 4)[:, None]
-        seq_val = numpy.arange(4, 8)[:, None]
+        x_val = np.arange(0, 4)[:, None]
+        seq_val = np.arange(4, 8)[:, None]

        out1, out2 = fct(x_val, seq_val)

-        expected_out1 = numpy.zeros((5, 4, 1))
-        expected_out2 = numpy.zeros((5, 4, 1))
+        expected_out1 = np.zeros((5, 4, 1))
+        expected_out2 = np.zeros((5, 4, 1))
        for i in range(4):
            expected_out2[i + 1] = expected_out2[i] + seq_val[i]
        for i in range(5):
@@ -2565,7 +2565,7 @@ class T_Scan(unittest.TestCase):
            diff = mitsot_m1 + seq1
            next_mitsot_val = mitsot_m2 + diff
            next_sitsot_val = sitsot_m1 - diff
-            nitsot_out = tensor.alloc(numpy.asarray(0., 'float32'),
+            nitsot_out = tensor.alloc(np.asarray(0., 'float32'),
                                      next_mitsot_val +
                                      next_sitsot_val)
            return next_sitsot_val, next_mitsot_val, nitsot_out
@@ -2584,7 +2584,7 @@ class T_Scan(unittest.TestCase):
        assert(len(scan_nodes_from_fct(f)) == 1)

        # This generate a scan crash during execution.
-        # output_shape = f(numpy.arange(5), 5, [1, 2])
+        # output_shape = f(np.arange(5), 5, [1, 2])
        # assert(all(output_shape == (5, 6)))

    # The following test will fail in DebugMode if there are
@@ -2608,7 +2608,7 @@ class T_Scan(unittest.TestCase):
        go1 = theano.tensor.grad(o1.mean(), wrt=x)
        f = theano.function([x], go1, updates=updates,
                            allow_input_downcast=True, mode=mode_with_opt)
-        self.assertTrue(numpy.allclose(f([1, 2, 3]), 2. / 3))
+        self.assertTrue(np.allclose(f([1, 2, 3]), 2. / 3))

        topo = f.maker.fgraph.toposort()
        # this new assert is here to test if scan_merging works ..
@@ -2711,7 +2711,7 @@ class T_Scan(unittest.TestCase):
            n.op, theano.scan_module.scan_op.Scan)]
        self.assertTrue(len(scans) == 2)

-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        x_val = rng.uniform(size=(4,)).astype(theano.config.floatX)
        y_val = rng.uniform(size=(4,)).astype(theano.config.floatX)
        # Run it so DebugMode can detect optimization problems.
@@ -2752,7 +2752,7 @@ class T_Scan(unittest.TestCase):
            return M

        # some initializations
-        hypx = numpy.log(numpy.tile([1,1,1,1,1,1,0.01], (3,1)))
+        hypx = np.log(np.tile([1,1,1,1,1,1,0.01], (3,1)))

        # variables used in the following expressions
        hyp = theano.shared(hypx)
@@ -2763,10 +2763,10 @@ class T_Scan(unittest.TestCase):

        M = init_predictive_output(inputs,targets,hyp,x_star,s_star)

-        X = numpy.random.random((10,4))
-        Y = numpy.random.random((10,3))
-        test_m = numpy.random.random((4,))
-        test_s = numpy.eye(4)
+        X = np.random.random((10,4))
+        Y = np.random.random((10,3))
+        test_m = np.random.random((4,))
+        test_s = np.eye(4)

        # Compute expected outputs (jacobian of M wrt x_star)
        dfdm = theano.function([inputs,targets,x_star,s_star],
@@ -2851,9 +2851,9 @@ class T_Scan(unittest.TestCase):

        x = theano.tensor.fmatrix('x')

-        mem_val = numpy.zeros((2,), dtype='float32')
+        mem_val = np.zeros((2,), dtype='float32')
        memory = theano.shared(mem_val)
-        W = theano.shared(numpy.random.random((5, 2)).astype('float32'))
+        W = theano.shared(np.random.random((5, 2)).astype('float32'))

        def f(inp, mem):
            i = theano.tensor.join(0, inp, mem)
@@ -2867,7 +2867,7 @@ class T_Scan(unittest.TestCase):
        f = theano.function([x], outs[0])
        f2 = theano.function([x], outs[1])

-        x_val = numpy.random.random((4, 3)).astype('float32')
+        x_val = np.random.random((4, 3)).astype('float32')

        f_vals = f(x_val)
        memory.set_value(mem_val)
@@ -2876,12 +2876,12 @@ class T_Scan(unittest.TestCase):

    def test_reduce_memory_consumption(self):

-        x = theano.shared(numpy.asarray(
-            numpy.random.uniform(size=(10,)), dtype=theano.config.floatX))
+        x = theano.shared(np.asarray(
+            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
        o, _ = theano.reduce(lambda v, acc: acc + v,
                             x,
                             theano.tensor.constant(
-                                 numpy.asarray(0.,
+                                 np.asarray(0.,
                                               dtype=theano.config.floatX)))
        mode = theano.compile.mode.FAST_RUN
        mode = mode.excluding('inplace')
@@ -2905,15 +2905,15 @@ class T_Scan(unittest.TestCase):

        gx = theano.tensor.grad(o, x)
        f2 = theano.function([], gx)
-        utt.assert_allclose(f2(), numpy.ones((10,)))
+        utt.assert_allclose(f2(), np.ones((10,)))

    def test_foldl_memory_consumption(self):
-        x = theano.shared(numpy.asarray(
-            numpy.random.uniform(size=(10,)), dtype=theano.config.floatX))
+        x = theano.shared(np.asarray(
+            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
        o, _ = theano.foldl(lambda v, acc: acc + v,
                            x,
                            theano.tensor.constant(
-                                numpy.asarray(0.,
+                                np.asarray(0.,
                                              dtype=theano.config.floatX)))

        mode = theano.compile.mode.FAST_RUN
@@ -2938,16 +2938,16 @@ class T_Scan(unittest.TestCase):

        gx = theano.tensor.grad(o, x)
        f2 = theano.function([], gx)
-        utt.assert_allclose(f2(), numpy.ones((10,)))
+        utt.assert_allclose(f2(), np.ones((10,)))

    def test_foldr_memory_consumption(self):

-        x = theano.shared(numpy.asarray(
-            numpy.random.uniform(size=(10,)), dtype=theano.config.floatX))
+        x = theano.shared(np.asarray(
+            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
        o, _ = theano.foldr(lambda v, acc: acc + v,
                            x,
                            theano.tensor.constant(
-                                numpy.asarray(0.,
+                                np.asarray(0.,
                                              dtype=theano.config.floatX)))

        mode = theano.compile.mode.FAST_RUN
@@ -2972,26 +2972,26 @@ class T_Scan(unittest.TestCase):

        gx = theano.tensor.grad(o, x)
        f2 = theano.function([], gx)
-        utt.assert_allclose(f2(), numpy.ones((10,)))
+        utt.assert_allclose(f2(), np.ones((10,)))

    @attr('slow')
    def test_rop2(self):
        seed = utt.fetch_seed()
-        rng = numpy.random.RandomState(seed)
+        rng = np.random.RandomState(seed)
        floatX = theano.config.floatX
-        v_u = numpy.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
-        v_W = numpy.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
-        v_h0 = numpy.array(rng.uniform(size=(5,)) - .5, dtype=floatX)
+        v_u = np.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
+        v_W = np.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
+        v_h0 = np.array(rng.uniform(size=(5,)) - .5, dtype=floatX)

-        v_eu = numpy.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
-        v_eW = numpy.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
-        v_eh0 = numpy.array(rng.uniform(size=(5,)) - .5, dtype=floatX)
+        v_eu = np.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
+        v_eW = np.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
+        v_eh0 = np.array(rng.uniform(size=(5,)) - .5, dtype=floatX)

        def rnn_fn(_u, _y, _W):

            srng = theano.tensor.shared_randomstreams.RandomStreams(seed)
            tmp_val = _u + _y + srng.uniform(size=v_h0.shape) *\
-                        numpy.asarray(1e-6, dtype=floatX)
+                        np.asarray(1e-6, dtype=floatX)
            sl_o = theano.tensor.tanh(theano.tensor.dot(_W, tmp_val))
            return sl_o, tmp_val

@@ -3053,15 +3053,15 @@ class T_Scan(unittest.TestCase):

    def test_rop(self):
        seed = utt.fetch_seed()
-        rng = numpy.random.RandomState(seed)
+        rng = np.random.RandomState(seed)
        floatX = theano.config.floatX
-        v_u = numpy.array(rng.uniform(size=(20, 5)), dtype=floatX)
-        v_W = numpy.array(rng.uniform(size=(5, 5)), dtype=floatX)
-        v_h0 = numpy.array(rng.uniform(size=(5,)), dtype=floatX)
+        v_u = np.array(rng.uniform(size=(20, 5)), dtype=floatX)
+        v_W = np.array(rng.uniform(size=(5, 5)), dtype=floatX)
+        v_h0 = np.array(rng.uniform(size=(5,)), dtype=floatX)

-        v_eu = numpy.array(rng.uniform(size=(20, 5)), dtype=floatX)
-        v_eW = numpy.array(rng.uniform(size=(5, 5)), dtype=floatX)
-        v_eh0 = numpy.array(rng.uniform(size=(5,)), dtype=floatX)
+        v_eu = np.array(rng.uniform(size=(20, 5)), dtype=floatX)
+        v_eW = np.array(rng.uniform(size=(5, 5)), dtype=floatX)
+        v_eh0 = np.array(rng.uniform(size=(5,)), dtype=floatX)

        def rnn_fn(_u, _y, _W):
            sl_o = theano.tensor.tanh(theano.tensor.dot(_W, (_u + _y)))
@@ -3163,14 +3163,14 @@ class T_Scan(unittest.TestCase):
        assert len(scan_nodes) == 0

        seed = utt.fetch_seed()
-        rng = numpy.random.RandomState(seed)
+        rng = np.random.RandomState(seed)
        floatX = theano.config.floatX
-        v_h = numpy.array(rng.uniform(size=(2,)), dtype=floatX)
-        v_W1 = numpy.array(rng.uniform(size=(2, 2)), dtype=floatX)
-        v_W2 = numpy.array(rng.uniform(size=(2, 2)), dtype=floatX)
+        v_h = np.array(rng.uniform(size=(2,)), dtype=floatX)
+        v_W1 = np.array(rng.uniform(size=(2, 2)), dtype=floatX)
+        v_W2 = np.array(rng.uniform(size=(2, 2)), dtype=floatX)

-        v_out = numpy.dot(v_h, v_W1 + v_W2)
-        sol = numpy.zeros((5, 2))
+        v_out = np.dot(v_h, v_W1 + v_W2)
+        sol = np.zeros((5, 2))
        # This line is here to make sol have the same shape as the output of
        # theano. Note that what we ask theano to do is to repeat the 2
        # elements vector v_out 5 times
@@ -3206,9 +3206,9 @@ class T_Scan(unittest.TestCase):
        f_ref = theano.function([W1, W2, step_indices], o, mode='FAST_COMPILE')

        # Compare the results of the two implementations
-        input_values = [numpy.random.random((5, 5)).astype("float32"),
-                        numpy.random.random((5, 5)).astype("float32"),
-                        numpy.arange(5).astype("float32")]
+        input_values = [np.random.random((5, 5)).astype("float32"),
+                        np.random.random((5, 5)).astype("float32"),
+                        np.arange(5).astype("float32")]

        out = f(*input_values)
        out_ref = f_ref(*input_values)
@@ -3243,10 +3243,10 @@ class T_Scan(unittest.TestCase):

        ([i_t, i_tm1], _) = theano.scan(
            fn, sequences=[inp],
-            outputs_info=[numpy.asarray([0.0, 0.0], theano.config.floatX),
+            outputs_info=[np.asarray([0.0, 0.0], theano.config.floatX),
                          None])
        f = theano.function([inp], [i_t, i_tm1])
-        val = numpy.arange(10).reshape(5, 2).astype(theano.config.floatX)
+        val = np.arange(10).reshape(5, 2).astype(theano.config.floatX)
        ret = f(val)
        utt.assert_allclose(ret[0], val + 10)
        utt.assert_allclose(ret[1], [[0.,  0.],
@@ -3330,7 +3330,7 @@ class T_Scan(unittest.TestCase):
            return x_t + 1, theano.scan_module.until(x_t > 3)
        o, _ = theano.scan(lambda_fn, x)
        f = theano.function([x], o)
-        vx = numpy.zeros((50,), dtype=theano.config.floatX)
+        vx = np.zeros((50,), dtype=theano.config.floatX)
        vx[23] = 4
        out = f(vx)
        assert len(out) == 24
@@ -3344,11 +3344,11 @@ class T_Scan(unittest.TestCase):
        o2, _ = theano.scan(lambda x_t: x_t + 2, x)

        f = theano.function([x], [o, o2], mode=mode_with_opt)
-        vx = numpy.zeros((50,), dtype=theano.config.floatX)
+        vx = np.zeros((50,), dtype=theano.config.floatX)
        vx[23] = 4
        out, out2 = f(vx)
        assert len(out) == 24
-        assert numpy.all(out2 == vx + 2)
+        assert np.all(out2 == vx + 2)
        lssc = [x for x in f.maker.fgraph.toposort()
                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
        # One scan node gets optimnized out
@@ -3402,7 +3402,7 @@ class T_Scan(unittest.TestCase):
                     polynomial3[-1],
                     polynomial4[-1]])

-        test_coeff = numpy.asarray([1, 0, 2], dtype=theano.config.floatX)
+        test_coeff = np.asarray([1, 0, 2], dtype=theano.config.floatX)
        # This will be tested by DEBUG_MODE
        out = calculate_polynomial(test_coeff, 3)
        assert out[0] == 19
@@ -3480,7 +3480,7 @@ class T_Scan(unittest.TestCase):
                            x)

        f = theano.function([x], [o, o2], mode=mode_with_opt)
-        vx = numpy.zeros((50,), dtype=theano.config.floatX)
+        vx = np.zeros((50,), dtype=theano.config.floatX)
        vx[23] = 4
        out, out2 = f(vx)
        assert len(out) == 24
@@ -3497,7 +3497,7 @@ class T_Scan(unittest.TestCase):
        o, _ = theano.scan(lambda_fn, x)

        f = theano.function([x], o.shape[0], mode=mode_with_opt)
-        vx = numpy.zeros((50,), dtype=theano.config.floatX)
+        vx = np.zeros((50,), dtype=theano.config.floatX)
        vx[23] = 4
        out = f(vx)
        assert out == 24
@@ -3516,7 +3516,7 @@ class T_Scan(unittest.TestCase):
                            [o1.shape[0], o2.shape[0]],
                            mode=mode_with_opt)

-        vx = numpy.ones((10,), dtype=theano.config.floatX)
+        vx = np.ones((10,), dtype=theano.config.floatX)
        out1, out2 = f(vx)
        assert out1 == 10
        assert out2 == 10
@@ -3535,7 +3535,7 @@ class T_Scan(unittest.TestCase):
                            [o1.shape[0], o2.shape[0]],
                            mode=mode_with_opt)

-        vx = numpy.ones((30,), dtype=theano.config.floatX)
+        vx = np.ones((30,), dtype=theano.config.floatX)
        o1, o2 = f(vx)
        assert o1 == 20
        assert o2 == 20
@@ -3635,13 +3635,13 @@ class T_Scan(unittest.TestCase):

        # Run the function and validate the outputs
        dtype = theano.config.floatX
-        seq_value = numpy.random.random((10, 3)).astype(dtype)
-        out_init_value = numpy.random.random((3, 3)).astype(dtype)
-        non_seq_value = numpy.random.random((3)).astype(dtype)
+        seq_value = np.random.random((10, 3)).astype(dtype)
+        out_init_value = np.random.random((3, 3)).astype(dtype)
+        non_seq_value = np.random.random((3)).astype(dtype)

        outputs =  fct(seq_value, out_init_value, non_seq_value)

-        expected_g_seq = numpy.array([[4, 4, 4],
+        expected_g_seq = np.array([[4, 4, 4],
                                      [3, 3, 3],
                                      [3, 3, 3],
                                      [3, 3, 3],
@@ -3652,7 +3652,7 @@ class T_Scan(unittest.TestCase):
                                      [1, 1, 1],
                                      [1, 1, 1]])
        expected_g_out_init = expected_g_seq[:3]
-        expected_g_non_seq = numpy.array([22, 22, 22])
+        expected_g_non_seq = np.array([22, 22, 22])

        utt.assert_allclose(outputs[0], expected_g_seq)
        utt.assert_allclose(outputs[1], expected_g_out_init)
@@ -3729,7 +3729,7 @@ class T_Scan(unittest.TestCase):
        assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 42

    def test_return_steps(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
@@ -3774,16 +3774,16 @@ class T_Scan(unittest.TestCase):
                             allow_input_downcast=True)

        # compute the values in numpy
-        v_x = numpy.zeros((8, 2), dtype=theano.config.floatX)
-        v_y = numpy.zeros((8,), dtype=theano.config.floatX)
-        v_x[0] = numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
-                    numpy.dot(v_x0, vW)
-        v_y[0] = numpy.dot(v_x0, vWout) + v_y0[2]
+        v_x = np.zeros((8, 2), dtype=theano.config.floatX)
+        v_y = np.zeros((8,), dtype=theano.config.floatX)
+        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
+                    np.dot(v_x0, vW)
+        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]

        for i in xrange(1, 8):
-            v_x[i] = numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
-                        numpy.dot(v_x[i - 1], vW)
-            v_y[i] = numpy.dot(v_x[i - 1], vWout) + v_y[i - 1]
+            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
+                        np.dot(v_x[i - 1], vW)
+            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]

        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)

@@ -3811,14 +3811,14 @@ class T_Scan(unittest.TestCase):
            assert any([isinstance(node.op, tensor.blas.Dot22)
                        for node in topo])

-        vx = numpy.array([[1., 1.], [2., 2.]], dtype=theano.config.floatX)
-        vA = numpy.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
-        vR = numpy.array([[[2, 1], [4, 2]], [[2, 1], [4, 2]]],
+        vx = np.array([[1., 1.], [2., 2.]], dtype=theano.config.floatX)
+        vA = np.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
+        vR = np.array([[[2, 1], [4, 2]], [[2, 1], [4, 2]]],
                         dtype=theano.config.floatX)
        utt.assert_allclose(f(vx, vA), vR)

    def test_savemem_opt(self):
-        y0 = theano.shared(numpy.ones((2, 10)))
+        y0 = theano.shared(np.ones((2, 10)))
        [y1, y2], updates = theano.scan(lambda y: [y, y],
                                         outputs_info=[dict(initial=y0,
                                                            taps=[-2]), None],
@@ -3860,9 +3860,9 @@ class T_Scan(unittest.TestCase):
        f = theano.function(inputs=[x, w], outputs=get_outputs(x, w))

        # Test the function to ensure it returns valid results
-        x_value = numpy.random.random((2, 2, 3)).astype(theano.config.floatX)
-        w_value = numpy.random.random((3, 3)).astype(theano.config.floatX)
-        expected_output = numpy.tile(x_value[:, 0].sum(0), (3, 1)).transpose()
+        x_value = np.random.random((2, 2, 3)).astype(theano.config.floatX)
+        w_value = np.random.random((3, 3)).astype(theano.config.floatX)
+        expected_output = np.tile(x_value[:, 0].sum(0), (3, 1)).transpose()

        output = f(x_value, w_value)
        utt.assert_allclose(output, expected_output)
@@ -3891,17 +3891,17 @@ class T_Scan(unittest.TestCase):
        gw, gx = tensor.grad(loss, [w, xinit])
        grad_fn = theano.function([xinit, w], [gx, gw],
                                 allow_input_downcast=True)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        # If numbers are small, the gradients with respect to x are small
        # and the numeric differentiation becomes unstable.
        # To fix this issue I ensure we are sampling numbers larger in
        # absolute value than 1.
-        v_x = numpy.array(rng.uniform(size=(5, 2, 2), low=1., high=3.),
+        v_x = np.array(rng.uniform(size=(5, 2, 2), low=1., high=3.),
                           dtype=theano.config.floatX)
        # Making some entries to be negative.
        pos = rng.uniform(size=(5, 2, 2), low=0., high=1) < .5
        v_x[pos] = -1 * v_x[pos]
-        v_w = numpy.array(rng.uniform(size=(2, 2), low=1., high=3.),
+        v_w = np.array(rng.uniform(size=(2, 2), low=1., high=3.),
                          dtype=theano.config.floatX)
        pos = rng.uniform(size=(2, 2), low=0., high=1.) < .5
        v_w[pos] = -1 * v_w[pos]
@@ -3916,11 +3916,11 @@ class T_Scan(unittest.TestCase):
                             num_grad.gx[max_err_pos]))

    def test_grad_numeric_shared(self):
-        shared_var = theano.shared(numpy.float32(1.))
+        shared_var = theano.shared(np.float32(1.))

        def inner_fn():
            return [], OrderedDict(
-                [(shared_var, shared_var + numpy.float32(1.))])
+                [(shared_var, shared_var + np.float32(1.))])
        _, updates = theano.scan(inner_fn,
                                 n_steps=10,
                                 truncate_gradient=-1,
@@ -3940,7 +3940,7 @@ class T_Scan(unittest.TestCase):
        n_pars = 1 * 3 + 3 * 3

        # Allocate big parameter array.
-        pars = theano.shared(numpy.empty(n_pars))
+        pars = theano.shared(np.empty(n_pars))

        # Assign slices.
        W1 = pars[:3].reshape(W1shape)
@@ -3983,15 +3983,15 @@ class T_Scan(unittest.TestCase):
        Hp = tensor.Rop(d_cost_wrt_pars, pars, p)

    def test_seq_tap_bug_jeremiah(self):
-        inp = numpy.arange(10).reshape(-1, 1).astype(theano.config.floatX)
-        exp_out = numpy.zeros((10, 1)).astype(theano.config.floatX)
+        inp = np.arange(10).reshape(-1, 1).astype(theano.config.floatX)
+        exp_out = np.zeros((10, 1)).astype(theano.config.floatX)
        exp_out[4:] = inp[:-4]

        def onestep(x, x_tm4):
            return x, x_tm4

        seq = tensor.matrix()
-        initial_value = theano.shared(numpy.zeros((4, 1),
+        initial_value = theano.shared(np.zeros((4, 1),
                                                  dtype=theano.config.floatX))
        outputs_info = [OrderedDict(
            [('initial', initial_value), ('taps', [-4])]), None]
@@ -4000,7 +4000,7 @@ class T_Scan(unittest.TestCase):
                                       outputs_info=outputs_info)

        f = theano.function([seq], results[1])
-        assert numpy.all(exp_out == f(inp))
+        assert np.all(exp_out == f(inp))

    def test_borrow_bug_jeremiah(self):
        # This tests two things. The first is a bug occuring when scan wrongly
@@ -4008,29 +4008,29 @@ class T_Scan(unittest.TestCase):
        # method will be able to remove the Scan node from the graph in this
        # case.

-        inp = numpy.arange(10).reshape(-1, 1).astype(theano.config.floatX)
-        exp_out = numpy.zeros((10, 1)).astype(theano.config.floatX)
+        inp = np.arange(10).reshape(-1, 1).astype(theano.config.floatX)
+        exp_out = np.zeros((10, 1)).astype(theano.config.floatX)
        exp_out[4:] = inp[:-4]

        def onestep(x, x_tm4):
            return x, x_tm4

        seq = tensor.matrix()
-        initial_value = theano.shared(numpy.zeros((4, 1),
+        initial_value = theano.shared(np.zeros((4, 1),
                                                  dtype=theano.config.floatX))
        outputs_info = [OrderedDict([('initial', initial_value),
                                     ('taps', [-4])]), None]
        results, _ = theano.scan(fn=onestep,
                                       sequences=seq,
                                       outputs_info=outputs_info)
-        sharedvar = theano.shared(numpy.zeros((1, 1),
+        sharedvar = theano.shared(np.zeros((1, 1),
                                              dtype=theano.config.floatX))
        updates = OrderedDict([(sharedvar, results[0][-1:])])

        f = theano.function([seq], results[1], updates=updates)

        # This fails if scan uses wrongly the borrow flag
-        assert numpy.all(exp_out == f(inp))
+        assert np.all(exp_out == f(inp))

        # This fails if Scan's infer_shape() is unable to remove the Scan
        # node from the graph.
@@ -4070,9 +4070,9 @@ class T_Scan(unittest.TestCase):

        # Compare obtained outputs with expected outputs
        floatX = theano.config.floatX
-        outputs = fct(numpy.arange(9, dtype=floatX).reshape(3,3))
+        outputs = fct(np.arange(9, dtype=floatX).reshape(3,3))

-        states = numpy.array([[0, 1, 2],
+        states = np.array([[0, 1, 2],
                              [3, 4, 5],
                              [6, 7, 8],
                              [9, 12, 15],
@@ -4144,8 +4144,8 @@ class T_Scan(unittest.TestCase):
        f = theano.function([v], gv)

        # Ensure the output of the function is valid
-        output = f(numpy.random.random(5))
-        utt.assert_allclose(output, numpy.ones(5))
+        output = f(np.random.random(5))
+        utt.assert_allclose(output, np.ones(5))

    def test_dot_optimization(self):
        A = tensor.matrix('A')
@@ -4155,10 +4155,10 @@ class T_Scan(unittest.TestCase):
                                        B.dimshuffle(0, 'x', 1)],
                           outputs_info=[tensor.zeros_like(A)])
        f = theano.function([A, B], S.owner.inputs[0][-1])
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        vA = rng.uniform(size=(5, 5)).astype(theano.config.floatX)
        vB = rng.uniform(size=(5, 5)).astype(theano.config.floatX)
-        utt.assert_allclose(f(vA, vB), numpy.dot(vA.T, vB))
+        utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB))

    def test_pregreedy_optimizer(self):
        W = tensor.zeros((5, 4))
@@ -4171,7 +4171,7 @@ class T_Scan(unittest.TestCase):
            lambda x: tensor.dot(tensor.dot(x, W) + bh_t, W.T) + bv_t,
            outputs_info=v,
            n_steps=2)
-        theano.function([v], chain)(numpy.zeros((3, 5),
+        theano.function([v], chain)(np.zeros((3, 5),
                                                dtype=theano.config.floatX))

    def test_savemem_does_not_duplicate_number_of_scan_nodes(self):
@@ -4210,7 +4210,7 @@ class T_Scan(unittest.TestCase):
                            updates=updates,
                            mode=theano.Mode(linker='py'),
                            allow_input_downcast=True)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = asarrayX(rng.uniform(size=(5,)))
        outs = f(v_u, [0, 0, 0], 0)
        utt.assert_allclose(outs[0], v_u + 1)
@@ -4243,7 +4243,7 @@ class T_Scan(unittest.TestCase):
                            updates=updates,
                            mode=theano.Mode(linker='py'),
                            allow_input_downcast=True)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_w = asarrayX(rng.uniform())
        outs = f(v_w, [0, 0, 0], 0)
        utt.assert_allclose(outs[0], v_w + 1)
@@ -4252,7 +4252,7 @@ class T_Scan(unittest.TestCase):
        utt.assert_allclose(sh.get_value(), v_w + 4)

    def test_grad_bug_disconnected_input(self):
-        W = theano.shared(numpy.zeros((3, 3)), name='W')
+        W = theano.shared(np.zeros((3, 3)), name='W')
        v = theano.tensor.ivector(name='v')
        y, _ = theano.scan(lambda i, W: W[i], sequences=v, outputs_info=None, non_sequences=W)

@@ -4270,14 +4270,14 @@ class T_Scan(unittest.TestCase):
            # theano.printing.debugprint(out)
            return theano.function([], out)()

-        x = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
+        x = theano.shared(np.asarray(0., dtype=theano.config.floatX))
        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=False),
                              1.21000003815)
        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=True),
                              1.21000003815)

    def test_grad_find_input(self):
-        w = theano.shared(numpy.array(0, dtype='float32'), name='w')
+        w = theano.shared(np.array(0, dtype='float32'), name='w')
        init = tensor.fscalar('init')

        out, _ = theano.scan(
@@ -4333,7 +4333,7 @@ class T_Scan(unittest.TestCase):
        for out in [y1, y2, y3, y4, y5, y6]:
            # This used to raise an exception
            f = theano.function([W, v], out, mode=mode_with_opt)
-            f(numpy.zeros((3, 3), dtype=theano.config.floatX), [1, 2])
+            f(np.zeros((3, 3), dtype=theano.config.floatX), [1, 2])

            scan_nodes = scan_nodes_from_fct(f)
            assert len(scan_nodes) == 1
@@ -4375,9 +4375,9 @@ class T_Scan(unittest.TestCase):
            # This used to raise an exception
            f = theano.function([W, v, vv], out, on_unused_input='ignore',
                                mode=mode_with_opt)
-            f(numpy.zeros((3, 3), theano.config.floatX),
+            f(np.zeros((3, 3), theano.config.floatX),
              [1, 2],
-              numpy.zeros((3, 3), theano.config.floatX))
+              np.zeros((3, 3), theano.config.floatX))

            scan_nodes = scan_nodes_from_fct(f)
            assert len(scan_nodes) == 1
@@ -4413,7 +4413,7 @@ class T_Scan(unittest.TestCase):
            result_inner, _ = theano.scan(
                fn=loss_inner,
                outputs_info=tensor.as_tensor_variable(
-                    numpy.asarray(0, dtype=numpy.float32)),
+                    np.asarray(0, dtype=np.float32)),
                non_sequences=[W],
                n_steps=1,
            )
@@ -4422,7 +4422,7 @@ class T_Scan(unittest.TestCase):
        result_outer, _ = theano.scan(
            fn=loss_outer,
            outputs_info=tensor.as_tensor_variable(
-                numpy.asarray(0, dtype=numpy.float32)),
+                np.asarray(0, dtype=np.float32)),
            non_sequences=[W],
            n_steps=n_steps,
            return_list=True,
@@ -4432,14 +4432,14 @@ class T_Scan(unittest.TestCase):
        H = theano.gradient.hessian(cost, W)
        print(".", file=sys.stderr)
        f = theano.function([W, n_steps], H)
-        f(numpy.ones((8,), dtype='float32'), 1)
+        f(np.ones((8,), dtype='float32'), 1)

    def test_strict_mode(self):
        n = 10

-        w = numpy.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
+        w = np.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
        w_ = theano.shared(w)
-        x0 = numpy.array([1, 2]).astype(theano.config.floatX)
+        x0 = np.array([1, 2]).astype(theano.config.floatX)
        x0_ = tensor.vector(name='x0', dtype=theano.config.floatX)

        def _scan_loose(x):
@@ -4474,9 +4474,9 @@ class T_Scan(unittest.TestCase):
    def test_strict_mode_ex(self):
        n = 10

-        w = numpy.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
+        w = np.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
        w_ = theano.shared(w)
-        x0 = numpy.array([1, 2]).astype(theano.config.floatX)
+        x0 = np.array([1, 2]).astype(theano.config.floatX)
        x0_ = tensor.vector(name='x0', dtype=theano.config.floatX)

        def _scan_loose(x):
@@ -4497,7 +4497,7 @@ class T_Scan(unittest.TestCase):
        # Build a MonitorMode that counts how many values are greater than 10
        def detect_large_outputs(i, node, fn):
            for output in fn.outputs:
-                if isinstance(output[0], numpy.ndarray):
+                if isinstance(output[0], np.ndarray):
                    detect_large_outputs.large_count += (output[0] > 10).sum()
        detect_large_outputs.large_count = 0

@@ -4516,7 +4516,7 @@ class T_Scan(unittest.TestCase):
        f = theano.function(inputs=[A, k],
                            outputs=final_result,
                            updates=updates)
-        f(numpy.asarray([2, 3, .1, 0, 1], dtype=theano.config.floatX), 4)
+        f(np.asarray([2, 3, .1, 0, 1], dtype=theano.config.floatX), 4)

        # There should be 3 outputs greater than 10: prior_result[0] at step 3,
        # and prior_result[1] at steps 2 and 3.
@@ -4574,19 +4574,19 @@ class ScanGpuTests:
                             mode=self.mode_with_gpu)

        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

-        v_u = numpy.asarray(v_u, dtype='float32')
-        v_x0 = numpy.asarray(v_x0, dtype='float32')
-        W = numpy.asarray(W, dtype='float32')
-        W_in = numpy.asarray(W_in, dtype='float32')
+        v_u = np.asarray(v_u, dtype='float32')
+        v_x0 = np.asarray(v_x0, dtype='float32')
+        W = np.asarray(W, dtype='float32')
+        W_in = np.asarray(W_in, dtype='float32')

        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
@@ -4646,14 +4646,14 @@ class ScanGpuTests:
                             mode=self.mode_with_gpu)

        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
@@ -4708,20 +4708,20 @@ class ScanGpuTests:
                             mode=self.mode_with_gpu)

        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
-        v_out1 = numpy.zeros((4,))
-        v_out2 = numpy.zeros((4,), dtype='int64')
+        v_out1 = np.zeros((4,))
+        v_out2 = np.zeros((4,), dtype='int64')
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in xrange(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
-            v_out2[step] = numpy.int64(v_u[step] + v_out1[step - 1])
+            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])

        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_out1, v_out1)
@@ -4735,8 +4735,8 @@ class ScanGpuTests:
        assert self.is_scan_on_gpu(scan_node)

    def test_gibbs_chain(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        v_vsample = numpy.array(rng.binomial(1, .5, size=(3, 20),),
+        rng = np.random.RandomState(utt.fetch_seed())
+        v_vsample = np.array(rng.binomial(1, .5, size=(3, 20),),
                                dtype='float32')
        vsample = theano.shared(v_vsample)
        trng = theano.sandbox.rng_mrg.MRG_RandomStreams(
@@ -4788,11 +4788,11 @@ class ScanGpuTests:

        # Initialize the network parameters
        floatX = theano.config.floatX
-        U = theano.shared(numpy.zeros((n_in, n_hid), dtype="float32"),
+        U = theano.shared(np.zeros((n_in, n_hid), dtype="float32"),
                        name='W_xin_to_l1')
-        V = theano.shared(numpy.zeros((n_hid, n_hid), dtype="float32"),
+        V = theano.shared(np.zeros((n_hid, n_hid), dtype="float32"),
                        name='W_l1_to_l1')
-        W = theano.shared(numpy.zeros((n_hid, n_out), dtype="float32"),
+        W = theano.shared(np.zeros((n_hid, n_out), dtype="float32"),
                        name='W_l1_to_l2')
        nparams = [U, V, W]

@@ -4802,7 +4802,7 @@ class ScanGpuTests:
        def scan_l(baseline, last_step):
            return baseline + tensor.dot(last_step, V)

-        zero_output = tensor.alloc(numpy.asarray(0., dtype="float32"),
+        zero_output = tensor.alloc(np.asarray(0., dtype="float32"),
                                   mb_size, n_hid)

        l1_out, _ = theano.scan(scan_l, sequences=[l1_base],
@@ -4833,9 +4833,9 @@ class ScanGpuTests:
        assert len(grad_scan_node.outputs) == 2, len(grad_scan_node.outputs)

        # Call the theano function to ensure the absence of a memory error
-        feval_backprop(numpy.zeros((mb_length, mb_size, n_in),
+        feval_backprop(np.zeros((mb_length, mb_size, n_in),
                                   dtype="float32"),
-                       numpy.zeros((mb_length, mb_size, n_out),
+                       np.zeros((mb_length, mb_size, n_out),
                                   dtype="float32"))

    def test_memory_reuse_gpudimshuffle(self):
@@ -4864,11 +4864,11 @@ class ScanGpuTests:
        fct = theano.function([input1, init], [out1, out2],
                              mode=self.mode_with_gpu)

-        output = fct(numpy.ones((2, 1, 1), dtype="float32"),
-                     numpy.ones((1, 1, 1), dtype="float32"))
+        output = fct(np.ones((2, 1, 1), dtype="float32"),
+                     np.ones((1, 1, 1), dtype="float32"))

-        expected_output = (numpy.array([2, 4], dtype="float32"),
-                           numpy.array([3, 7], dtype="float32"))
+        expected_output = (np.array([2, 4], dtype="float32"),
+                           np.array([3, 7], dtype="float32"))
        utt.assert_allclose(output, expected_output)


@@ -4985,7 +4985,7 @@ def test_speed():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")

-    r = numpy.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
+    r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)

    t0 = time.time()
    for i in xrange(1, 1000):
@@ -4993,7 +4993,7 @@ def test_speed():
    t1 = time.time()
    print('python', t1 - t0)

-    r = numpy.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
+    r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
    t0 = time.time()
    r_i = iter(r[1:])
    r_ii = iter(r[:-1])
@@ -5015,7 +5015,7 @@ def test_speed():
    print('python with builtin iterator', t1 - t0)

    if 1:
-        r = numpy.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
+        r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
        s_r = tensor.matrix()
        s_y, updates = theano.scan(fn=lambda ri, rii: ri + rii,
                sequences=[s_r[1:]],
@@ -5030,9 +5030,9 @@ def test_speed():
        print('theano (scan, cvm)', t3 - t2)

    if 1:
-        r = numpy.arange(10000).astype(theano.config.floatX).reshape(-1, 10)
+        r = np.arange(10000).astype(theano.config.floatX).reshape(-1, 10)
        shared_r = theano.shared(r)
-        s_i = theano.shared(numpy.array(1))
+        s_i = theano.shared(np.array(1))
        s_rinc = tensor.inc_subtensor(shared_r[s_i], shared_r[s_i - 1],
                tolerate_inplace_aliasing=True)
        # theano.printing.debugprint(s_rinc)
@@ -5075,18 +5075,18 @@ def test_speed_rnn():
    L = 10000
    N = 50

-    numpy.random.seed(2523452)
-    r = numpy.arange(L * N).astype(theano.config.floatX).reshape(L, N)
-    w = numpy.random.randn(N, N).astype(theano.config.floatX)
+    np.random.seed(2523452)
+    r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
+    w = np.random.randn(N, N).astype(theano.config.floatX)

    t0 = time.time()
    for i in xrange(1, L):
-        r[i] = numpy.tanh(numpy.dot(r[i - 1], w))
+        r[i] = np.tanh(np.dot(r[i - 1], w))
    t1 = time.time()
    print('python', t1 - t0)

    if 1:
-        r = numpy.arange(L * N).astype(theano.config.floatX).reshape(L, N)
+        r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
        s_r = tensor.matrix()
        s_y, updates = theano.scan(
                fn=lambda ri, rii: tensor.tanh(tensor.dot(rii, w)),
@@ -5102,7 +5102,7 @@ def test_speed_rnn():
        print('theano (scan, cvm)', t3 - t2)

    if 1:
-        r = numpy.arange(L * N).astype(theano.config.floatX).reshape(L, N)
+        r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
        s_w = theano.shared(w)
        shared_r = theano.shared(r)
        s_i = theano.scalar.sharedvar.shared(1)
@@ -5154,18 +5154,18 @@ def test_speed_batchrnn():
    B = 50
    N = 400

-    numpy.random.seed(2523452)
-    r = numpy.arange(B * L * N).astype(theano.config.floatX).reshape(L, B, N)
-    w = numpy.random.randn(N, N).astype(theano.config.floatX)
+    np.random.seed(2523452)
+    r = np.arange(B * L * N).astype(theano.config.floatX).reshape(L, B, N)
+    w = np.random.randn(N, N).astype(theano.config.floatX)

    t0 = time.time()
    for i in xrange(1, L):
-        r[i] = numpy.tanh(numpy.dot(r[i - 1], w))
+        r[i] = np.tanh(np.dot(r[i - 1], w))
    t1 = time.time()
    print('python', t1 - t0)

    if 1:
-        r = numpy.arange(B * L * N).astype(
+        r = np.arange(B * L * N).astype(
            theano.config.floatX).reshape(L, B, N)
        s_w = theano.shared(w)
        shared_r = theano.shared(r)
@@ -5328,9 +5328,9 @@ def test_compute_test_value():
    theano.config.compute_test_value = 'raise'
    try:
        x = tensor.vector('x')
-        xv = numpy.ones(3, dtype=theano.config.floatX)
+        xv = np.ones(3, dtype=theano.config.floatX)
        x.tag.test_value = xv
-        y = theano.shared(numpy.arange(3, dtype=theano.config.floatX),
+        y = theano.shared(np.arange(3, dtype=theano.config.floatX),
                          name='y')
        z, updates = theano.scan(
                fn=lambda u, v: u + v,
@@ -5351,10 +5351,10 @@ def test_compute_test_value_nonseq():
    theano.config.compute_test_value = 'raise'
    try:
        x = tensor.vector('x')
-        xv = numpy.ones(3, dtype=theano.config.floatX)
+        xv = np.ones(3, dtype=theano.config.floatX)
        x.tag.test_value = xv
        y = theano.shared(
-                numpy.arange(9, dtype=theano.config.floatX).reshape(3, 3),
+                np.arange(9, dtype=theano.config.floatX).reshape(3, 3),
                name='y')
        z, updates = theano.scan(
                fn=lambda u, v: u + v,
@@ -5373,7 +5373,7 @@ def test_compute_test_value_nonseq():
 def test_compute_test_value_grad():
    # Test case originally reported by Bitton Tenessi
    # https://groups.google.com/d/msg/theano-users/fAP3i2CbskQ/3OgBf4yjqiQJ
-    WEIGHT = numpy.array([1, 2, 1, 3, 4, 1, 5, 6, 1, 7, 8, 1],
+    WEIGHT = np.array([1, 2, 1, 3, 4, 1, 5, 6, 1, 7, 8, 1],
                         dtype='float32')

    old_compute_test_val = theano.config.compute_test_value
@@ -5387,13 +5387,13 @@ def test_compute_test_value_grad():
        W = W_flat.reshape((2, 2, 3))

        outputs_mi = tensor.as_tensor_variable(
-                numpy.asarray(0, dtype='float32'))
-        outputs_mi.tag.test_value = numpy.asarray(0, dtype='float32')
+                np.asarray(0, dtype='float32'))
+        outputs_mi.tag.test_value = np.asarray(0, dtype='float32')

        def loss_mi(mi, sum_mi, W):
            outputs_ti = tensor.as_tensor_variable(
-                    numpy.asarray(0, dtype='float32'))
-            outputs_ti.tag.test_value = numpy.asarray(0, dtype='float32')
+                    np.asarray(0, dtype='float32'))
+            outputs_ti.tag.test_value = np.asarray(0, dtype='float32')

            def loss_ti(ti, sum_ti, mi, W):
                return W.sum().sum().sum() + sum_ti
@@ -5430,10 +5430,10 @@ def test_compute_test_value_grad_cast():
    theano.config.compute_test_value = 'raise'
    try:
        h = tensor.matrix('h')
-        h.tag.test_value = numpy.array([[1, 2, 3, 4], [5, 6, 7, 8]],
+        h.tag.test_value = np.array([[1, 2, 3, 4], [5, 6, 7, 8]],
                                       dtype=floatX)

-        w = theano.shared(numpy.random.randn(4, 3).astype(floatX), name='w')
+        w = theano.shared(np.random.randn(4, 3).astype(floatX), name='w')

        outputs, _ = theano.scan(lambda i, h, w: (theano.dot(h[i], w), i),
                                 outputs_info=[None, 0], non_sequences=[h, w],
@@ -5473,10 +5473,10 @@ def test_outputs_taps_check():

 def test_default_value_broadcasted():
    def floatx(X):
-        return numpy.asarray(X, dtype=theano.config.floatX)
+        return np.asarray(X, dtype=theano.config.floatX)

    def init_weights(shape, name):
-        return theano.shared(floatx(numpy.random.randn(*shape) * 0.1), name)
+        return theano.shared(floatx(np.random.randn(*shape) * 0.1), name)

    X = theano.tensor.matrix('X')
    in_size = 2
@@ -5494,14 +5494,14 @@ def test_default_value_broadcasted():
    gW_x = theano.tensor.grad(cost, W_x)
    updates = [(W_x, W_x - 0.1 * gW_x)]
    f = theano.function([X], outputs=cost, updates=updates)
-    f(numpy.random.rand(10, in_size).astype(X.dtype))
+    f(np.random.rand(10, in_size).astype(X.dtype))


 class TestInconsistentBroadcast(unittest.TestCase):

    def test_raise_error(self):
        x = tensor.tensor3()
-        initial_x = tensor.constant(numpy.zeros((1, 10)))
+        initial_x = tensor.constant(np.zeros((1, 10)))
        y, updates = theano.scan(fn=lambda x, prev_x: x + prev_x,
                                 sequences=x,
                                 outputs_info=[dict(initial=initial_x)])

--- a/theano/scan_module/tests/test_scan_checkpoints.py
+++ b/theano/scan_module/tests/test_scan_checkpoints.py
 from __future__ import absolute_import, print_function, division

-import numpy
+import numpy as np
 import unittest

 import theano
@@ -39,14 +39,14 @@ class TestScanCheckpoint(unittest.TestCase):
        f = theano.function(inputs=[self.A, self.k],
                            outputs=[self.result, self.result_check])
        out, out_check = f(range(10), 101)
-        assert numpy.allclose(out, out_check)
+        assert np.allclose(out, out_check)

    def test_backward_pass(self):
        """Test gradient computation of A**k."""
        f = theano.function(inputs=[self.A, self.k],
                            outputs=[self.grad_A, self.grad_A_check])
        out, out_check = f(range(10), 101)
-        assert numpy.allclose(out, out_check)
+        assert np.allclose(out, out_check)

    @unittest.skipUnless(PYGPU_AVAILABLE, 'Requires pygpu.')
    def test_memory(self):
@@ -59,7 +59,7 @@ class TestScanCheckpoint(unittest.TestCase):
        f_check = theano.function(inputs=[self.A, self.k],
                                  outputs=self.grad_A_check, mode=mode_with_gpu)
        free_gmem = theano.gpuarray.type._context_reg[None].free_gmem
-        data = numpy.ones(free_gmem // 3000, dtype=numpy.float32)
+        data = np.ones(free_gmem // 3000, dtype=np.float32)
        # Check that it works with the checkpoints
        f_check(data, 1000)
        # Check that the basic scan fails in that case

--- a/theano/scan_module/tests/test_scan_opt.py
+++ b/theano/scan_module/tests/test_scan_opt.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import unittest

 import theano
@@ -18,7 +18,7 @@ class TestGaussNewton(unittest.TestCase):
    This test case is based on code by Sigurd Spieckermann.
    """
    def setUp(self):
-        self.rng = numpy.random.RandomState(utt.fetch_seed())
+        self.rng = np.random.RandomState(utt.fetch_seed())

    def _run(self, num_features, num_timesteps, batch_size, mode):
        # determine shapes of inputs and targets depending on the batch size
@@ -58,8 +58,8 @@ class TestGaussNewton(unittest.TestCase):
        W_hy = theano.shared(
            (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX),
            borrow=True)
-        b_h = theano.shared(numpy.zeros(10).astype(config.floatX), borrow=True)
-        b_y = theano.shared(numpy.zeros(1).astype(config.floatX), borrow=True)
+        b_h = theano.shared(np.zeros(10).astype(config.floatX), borrow=True)
+        b_y = theano.shared(np.zeros(1).astype(config.floatX), borrow=True)

        params = [W_xh, W_hh, W_hy, b_h, b_y]

@@ -171,8 +171,8 @@ class TestPushOutScanOutputDot(object):

        # Ensure that the function compiled with the optimization produces
        # the same results as the function compiled without
-        v_value = numpy.random.random((4)).astype(config.floatX)
-        m_value = numpy.random.random((4, 5)).astype(config.floatX)
+        v_value = np.random.random((4)).astype(config.floatX)
+        m_value = np.random.random((4, 5)).astype(config.floatX)

        output_opt = f_opt(v_value, m_value)
        output_no_opt = f_no_opt(v_value, m_value)
@@ -217,8 +217,8 @@ class TestPushOutScanOutputDot(object):

        # Ensure that the function compiled with the optimization produces
        # the same results as the function compiled without
-        a_value = numpy.random.random((3, 4)).astype(config.floatX)
-        b_value = numpy.random.random((4, 5)).astype(config.floatX)
+        a_value = np.random.random((3, 4)).astype(config.floatX)
+        b_value = np.random.random((4, 5)).astype(config.floatX)

        output_opt = f_opt(a_value, b_value)
        output_no_opt = f_no_opt(a_value, b_value)
@@ -263,8 +263,8 @@ class TestPushOutScanOutputDot(object):

        # Ensure that the function compiled with the optimization produces
        # the same results as the function compiled without
-        a_value = numpy.random.random((3, 4)).astype(config.floatX)
-        b_value = numpy.random.random((4, 5)).astype(config.floatX)
+        a_value = np.random.random((3, 4)).astype(config.floatX)
+        b_value = np.random.random((4, 5)).astype(config.floatX)

        output_opt = f_opt(a_value, b_value)
        output_no_opt = f_no_opt(a_value, b_value)
@@ -296,7 +296,7 @@ class TestPushOutSumOfDot():
        dim = 5

        # Weight matrices
-        U = theano.shared(numpy.random.normal(size=(dim, dim),
+        U = theano.shared(np.random.normal(size=(dim, dim),
                                              scale=0.0001).astype(config.floatX))
        U.name = 'U'
        V = theano.shared(U.get_value())
@@ -306,7 +306,7 @@ class TestPushOutSumOfDot():

        # Variables and their values
        x = T.tensor3('x')
-        x_value = numpy.random.normal(size=(seq_len, batch_size, dim),
+        x_value = np.random.normal(size=(seq_len, batch_size, dim),
                                      scale=0.0001).astype(config.floatX)

        ri = T.tensor3('ri')
@@ -315,7 +315,7 @@ class TestPushOutSumOfDot():
        zi = T.tensor3('zi')
        zi_value = x_value

-        init = T.alloc(numpy.cast[config.floatX](0), batch_size, dim)
+        init = T.alloc(np.cast[config.floatX](0), batch_size, dim)
        def rnn_step1(
                # sequences
                x, ri, zi,
@@ -375,8 +375,8 @@ class TestPushOutSumOfDot():
        input2 = T.tensor3()
        input3 = T.tensor3()

-        W = theano.shared(numpy.random.normal(size=(4, 5))).astype(config.floatX)
-        U = theano.shared(numpy.random.normal(size=(6, 7))).astype(config.floatX)
+        W = theano.shared(np.random.normal(size=(4, 5))).astype(config.floatX)
+        U = theano.shared(np.random.normal(size=(6, 7))).astype(config.floatX)

        def inner_fct(seq1, seq2, seq3, previous_output):
            temp1 = T.dot(seq1, W) + seq3
@@ -384,7 +384,7 @@ class TestPushOutSumOfDot():
            dot_output = T.dot(temp1, temp2)
            return previous_output + dot_output

-        init = T.as_tensor_variable(numpy.random.normal(size=(3, 7)))
+        init = T.as_tensor_variable(np.random.normal(size=(3, 7)))

        # Compile the function twice, once with the optimization and once
        # without
@@ -410,9 +410,9 @@ class TestPushOutSumOfDot():
        # TODO

        # Compare the outputs of the 2 functions
-        input1_value = numpy.random.random((2, 3, 4)).astype(config.floatX)
-        input2_value = numpy.random.random((2, 5, 6)).astype(config.floatX)
-        input3_value = numpy.random.random((2, 3, 5)).astype(config.floatX)
+        input1_value = np.random.random((2, 3, 4)).astype(config.floatX)
+        input2_value = np.random.random((2, 5, 6)).astype(config.floatX)
+        input3_value = np.random.random((2, 3, 5)).astype(config.floatX)

        output_opt = f_opt(input1_value, input2_value, input3_value)
        output_no_opt = f_no_opt(input1_value, input2_value, input3_value)

--- a/theano/scan_module/tests/test_scan_utils.py
+++ b/theano/scan_module/tests/test_scan_utils.py
 from __future__ import absolute_import, print_function, division
 import itertools
 import unittest
-import numpy
+import numpy as np
 import theano
 from theano import tensor
 from theano.scan_module.scan_utils import equal_computations, map_variables
@@ -51,8 +51,8 @@ class TestMapVariables(unittest.TestCase):
        s2, = map_variables(self.replacer, [s])

        f = theano.function([x, y, z], [s, s2])
-        rval = f(x=numpy.array([1, 2, 3], dtype=numpy.float32), y=1, z=2)
-        assert numpy.array_equal(rval, [[1, 2, 3], [2, 4, 6]])
+        rval = f(x=np.array([1, 2, 3], dtype=np.float32), y=1, z=2)
+        assert np.array_equal(rval, [[1, 2, 3], [2, 4, 6]])

    def test_scan(self):
        x = tensor.vector('x')
@@ -64,7 +64,7 @@ class TestMapVariables(unittest.TestCase):
        # should do this as well.
        outer = tensor.scalar("outer")
        shared = theano.shared(
-            numpy.array(1., dtype=theano.config.floatX),
+            np.array(1., dtype=theano.config.floatX),
            name="shared")
        constant = tensor.constant(1, name="constant")

@@ -77,7 +77,7 @@ class TestMapVariables(unittest.TestCase):
            return r

        s, _ = theano.scan(step, sequences=x,
-                           outputs_info=[numpy.array(0.)])
+                           outputs_info=[np.array(0.)])
        # ensure z is owned by the outer graph so map_variables() will need to
        # jump through additional hoops to placate FunctionGraph.
        t = z * s
@@ -85,8 +85,8 @@ class TestMapVariables(unittest.TestCase):
        t2 = z * s2

        f = theano.function([x, outer], [t, t2])
-        rval = f(x=numpy.array([1, 2, 3], dtype=numpy.float32), outer=0.5)
-        assert numpy.array_equal(rval, [[1, 3, 6], [-1, -3, -6]])
+        rval = f(x=np.array([1, 2, 3], dtype=np.float32), outer=0.5)
+        assert np.array_equal(rval, [[1, 3, 6], [-1, -3, -6]])

    def test_scan_with_shared_update(self):
        x = tensor.vector('x')
@@ -104,7 +104,7 @@ class TestMapVariables(unittest.TestCase):
            return r

        s, _ = theano.scan(step, sequences=x,
-                           outputs_info=[numpy.array(0.)])
+                           outputs_info=[np.array(0.)])
        self.assertRaises(NotImplementedError,
                          map_variables, self.replacer, [s])

@@ -128,7 +128,7 @@ class TestMapVariables(unittest.TestCase):
            return r + counter

        s, _ = theano.scan(step, sequences=x,
-                           outputs_info=[numpy.array(0.)])
+                           outputs_info=[np.array(0.)])
        self.assertRaises(NotImplementedError,
                          map_variables, self.replacer, [s])

@@ -137,7 +137,7 @@ class TestMapVariables(unittest.TestCase):
        # inner graph.
        outer = tensor.scalar("outer")
        shared = theano.shared(
-            numpy.array(1., dtype=theano.config.floatX),
+            np.array(1., dtype=theano.config.floatX),
            name="shared")
        constant = tensor.constant(1., name="constant")
        z = outer * (shared + constant)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -130,10 +130,10 @@ import logging
 import os
 import time

-import numpy
+import numpy as np
 import numpy.distutils
 try:
-    import numpy.distutils.__config__
+    import numpy.distutils.__config__  # noqa
 except ImportError:
    pass

@@ -166,10 +166,10 @@ try:
        # `scipy.linalg.blas.fblas` with `scipy.linalg.blas`.
        # See http://github.com/scipy/scipy/pull/358
        fblas = scipy.linalg.blas
-    _blas_gemv_fns = {numpy.dtype('float32'): fblas.sgemv,
-                      numpy.dtype('float64'): fblas.dgemv,
-                      numpy.dtype('complex64'): fblas.cgemv,
-                      numpy.dtype('complex128'): fblas.zgemv}
+    _blas_gemv_fns = {np.dtype('float32'): fblas.sgemv,
+                      np.dtype('float64'): fblas.dgemv,
+                      np.dtype('complex64'): fblas.cgemv,
+                      np.dtype('complex128'): fblas.zgemv}
 except ImportError as e:
    have_fblas = False
    # This is used in Gemv and ScipyGer. We use CGemv and CGer
@@ -190,12 +190,12 @@ def check_init_y():
        if not have_fblas:
            check_init_y._result = False

-        y = float('NaN') * numpy.ones((2,))
-        x = numpy.ones((2,))
-        A = numpy.ones((2, 2))
+        y = float('NaN') * np.ones((2,))
+        x = np.ones((2,))
+        A = np.ones((2, 2))
        gemv = _blas_gemv_fns[y.dtype]
        gemv(1.0, A.T, x, 0.0, y, overwrite_y=True, trans=True)
-        check_init_y._result = numpy.isnan(y).any()
+        check_init_y._result = np.isnan(y).any()

    return check_init_y._result

@@ -269,7 +269,7 @@ class Gemv(Op):
            out_storage[0][0] = gemv(alpha, A.T, x, beta, y,
                                     overwrite_y=self.inplace, trans=True)
        else:
-            out = numpy.dot(A, x)
+            out = np.dot(A, x)
            if alpha != 1:
                out *= alpha
            if beta != 0:
@@ -277,7 +277,7 @@ class Gemv(Op):
                    out += beta * y
                else:
                    out += y
-            out_storage[0][0] = numpy.asarray(out, dtype=y.dtype)
+            out_storage[0][0] = np.asarray(out, dtype=y.dtype)

    def infer_shape(self, node, input_shapes):
        return [input_shapes[0]]
@@ -341,9 +341,9 @@ class Ger(Op):
        else:
            A = cA.copy()
        if calpha != 1:
-            A += calpha * numpy.outer(cx, cy)
+            A += calpha * np.outer(cx, cy)
        else:
-            A += numpy.outer(cx, cy)
+            A += np.outer(cx, cy)
        cZ[0] = A

    def infer_shape(self, node, input_shapes):
@@ -900,26 +900,26 @@ class Gemm(GemmRelated):
        if not self.inplace:
            z = z.copy()  # the original z will not be changed
        if z.shape == ():
-            z.itemset(z * a + b * numpy.dot(x, y))
+            z.itemset(z * a + b * np.dot(x, y))
            zout[0] = z
        else:
            if b == 0.0:
                if a == 1.0:
-                    z[:] = numpy.dot(x, y)
+                    z[:] = np.dot(x, y)
                elif a == -1.0:
-                    z[:] = -numpy.dot(x, y)
+                    z[:] = -np.dot(x, y)
                else:
-                    z[:] = a * numpy.dot(x, y)
+                    z[:] = a * np.dot(x, y)
            elif b == 1.0:
                if a == 1.0:
-                    z += numpy.dot(x, y)
+                    z += np.dot(x, y)
                elif a == -1.0:
-                    z -= numpy.dot(x, y)
+                    z -= np.dot(x, y)
                else:
-                    z += a * numpy.dot(x, y)
+                    z += a * np.dot(x, y)
            else:
                z *= b
-                z += a * numpy.dot(x, y)
+                z += a * np.dot(x, y)
            zout[0] = z

    def infer_shape(self, node, input_shapes):
@@ -1066,7 +1066,7 @@ def _as_scalar(res, dtype=None):
    """Return None or a TensorVariable whose type is in T.float_scalar_types"""
    if dtype is None:
        dtype = config.floatX
-    if numpy.all(res.type.broadcastable):
+    if np.all(res.type.broadcastable):
        while res.owner and isinstance(res.owner.op, T.DimShuffle):
            res = res.owner.inputs[0]
        # may still have some number of True's
@@ -1216,7 +1216,7 @@ def _gemm_canonicalize(r, scale, rval, maxclients):
        vectors = []
        matrices = []
        for i in r.owner.inputs:
-            if numpy.all(i.type.broadcastable):
+            if np.all(i.type.broadcastable):
                while i.owner and isinstance(i.owner.op, T.DimShuffle):
                    i = i.owner.inputs[0]
                if i.type.broadcastable:
@@ -1539,7 +1539,7 @@ class Dot22(GemmRelated):
        x, y = inp
        z, = out
        try:
-            z[0] = numpy.asarray(numpy.dot(x, y))
+            z[0] = np.asarray(np.dot(x, y))
        except ValueError as e:
            # The error raised by numpy has no shape information, we mean to
            # add that
@@ -1704,8 +1704,8 @@ def local_dot22_to_ger_or_gemv(node):
        x, y = node.inputs
        xb = x.broadcastable
        yb = y.broadcastable
-        one = T.as_tensor_variable(numpy.asarray(1, dtype=x.dtype))
-        zero = T.as_tensor_variable(numpy.asarray(0, dtype=x.dtype))
+        one = T.as_tensor_variable(np.asarray(1, dtype=x.dtype))
+        zero = T.as_tensor_variable(np.asarray(0, dtype=x.dtype))
        if xb[1] and yb[0]:
            # x and y are both vectors so this might qualifies for a GER
            xv = x.dimshuffle(0)
@@ -1810,7 +1810,7 @@ class Dot22Scalar(GemmRelated):
        x, y, scalar = inp
        z, = out
        try:
-            z[0] = numpy.asarray(scalar * numpy.dot(x, y))
+            z[0] = np.asarray(scalar * np.dot(x, y))
        except ValueError as e:
            # The error raised by numpy has no shape information, we
            # mean to add that
@@ -2034,9 +2034,9 @@ class BatchedDot(Op):

        shape = self.infer_shape(node, [i.shape for i in inp])[0]
        dtype = node.outputs[0].dtype
-        z0 = z[0] = numpy.empty(shape, dtype=dtype)
+        z0 = z[0] = np.empty(shape, dtype=dtype)
        for i in xrange(z0.shape[0]):
-            z0[i] = numpy.dot(x[i], y[i])
+            z0[i] = np.dot(x[i], y[i])

    def c_support_code(self):
        batch_gemm_defn = """