Merge pull request #5458 from bscellier/import_numpy_gpuarray

Import numpy gpuarray

Merge pull request #5458 from bscellier/import_numpy_gpuarray
824cb369 · ballasn · GitHub · 2bb065fc · 5159a6b3 · 824cb369
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import os
 import copy
 import re
-import numpy
+import numpy as np
 from theano import Op, Apply, Type, Variable
 from theano import tensor, config
@@ -206,7 +206,7 @@ class Kernel(object):
    def get_flags(*types):
        def get_dtype(t):
            if isinstance(t, string_types):
-                return numpy.dtype(t)
+                return np.dtype(t)
            elif isinstance(t, Type):
                return t.dtype
            elif isinstance(t, Variable):
@@ -215,13 +215,13 @@ class Kernel(object):
                raise TypeError("can't get a dtype from %s" % (type(t),))
        dtypes = [get_dtype(t) for t in types]
        flags = dict(cluda=True)
-        if any(d == numpy.float64 for d in dtypes):
+        if any(d == np.float64 for d in dtypes):
            flags['have_double'] = True
        if any(d.itemsize < 4 for d in dtypes):
            flags['have_small'] = True
        if any(d.kind == 'c' for d in dtypes):
            flags['have_complex'] = True
-        if any(d == numpy.float16 for d in dtypes):
+        if any(d == np.float16 for d in dtypes):
            flags['have_half'] = True
        return flags
@@ -275,8 +275,8 @@ def get_ctype(dtype):
    elif dtype == gpuarray.SSIZE:
        return "ssize_t"
    else:
-        if not isinstance(dtype, numpy.dtype):
+        if not isinstance(dtype, np.dtype):
-            dtype = numpy.dtype(dtype)
+            dtype = np.dtype(dtype)
        return 'npy_' + dtype.name
@@ -311,7 +311,7 @@ class GpuKernelBase(object):
        except MethodNotDefined:
            o = []
        # We rely on the input types for the directory to gpuarray includes
-        return o + [numpy.get_include()]
+        return o + [np.get_include()]
    def _generate_kernel_bin(self, k, ctx):
        gk = gpuarray.GpuKernel(k.code, k.name, k.params, context=ctx,
@@ -466,7 +466,7 @@ def get_dtype(s):
    if s == 'ssize':
        return gpuarray.SSIZE
    else:
-        return numpy.dtype(s)
+        return np.dtype(s)
 class CGpuKernelBase(COp, GpuKernelBase):
@@ -565,7 +565,7 @@ class HostFromGpu(Op):
    def perform(self, node, inp, out):
        x, = inp
        z, = out
-        z[0] = numpy.asarray(x)
+        z[0] = np.asarray(x)
    def c_code(self, node, name, inputs, outputs, sub):
        return """
@@ -1285,7 +1285,7 @@ class GpuJoin(HideC, Join):
        if axis < 0:
            axis += axis_and_tensors[1].ndim
        # we check these tensors for being empty.
-        if (view != -1) and numpy.all(
+        if (view != -1) and np.all(
                [tensor.shape[axis] == 0 for tensor in
                 tensors[0:view] + tensors[view + 1:]]):
            out[0] = tensors[view]

--- a/theano/gpuarray/blocksparse.py
+++ b/theano/gpuarray/blocksparse.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import logging
 import os
-import numpy
+import numpy as np
 from theano import Apply, tensor
 from theano.gof import COp
 from theano.tensor import discrete_dtypes, as_tensor_variable
@@ -121,7 +121,7 @@ class GpuSparseBlockOuter(COp):
    def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
        ctx = infer_context_name(o, x, y)
-        one = tensor.constant(numpy.asarray(1.0, dtype='float32'))
+        one = tensor.constant(np.asarray(1.0, dtype='float32'))
        o = as_gpuarray_variable(o, ctx)
        x = as_gpuarray_variable(x, ctx)
        y = as_gpuarray_variable(y, ctx)

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -4,7 +4,7 @@ import os
 import sys
 import warnings
-import numpy
+import numpy as np
 from six import integer_types
 import theano
@@ -457,8 +457,8 @@ gpu_dnn_conv_desc.cache = {}
 # scalar constants
-_zero = constant(numpy.asarray(0.0, dtype='float64'))
+_zero = constant(np.asarray(0.0, dtype='float64'))
-_one = constant(numpy.asarray(1.0, dtype='float64'))
+_one = constant(np.asarray(1.0, dtype='float64'))
 def ensure_dt(val, default, name, dtype):
@@ -2482,8 +2482,8 @@ class RNNBlock(object):
        bytesize = _get_param_size(self.desc, input_size, self.dtype,
                                   self.context_name)
        bytesize = int(bytesize)
-        assert bytesize % numpy.dtype(self.dtype).itemsize == 0
+        assert bytesize % np.dtype(self.dtype).itemsize == 0
-        return bytesize // numpy.dtype(self.dtype).itemsize
+        return bytesize // np.dtype(self.dtype).itemsize
    def split_params(self, w, layer, input_size):
        if not isinstance(w, GpuArraySharedVariable):

--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
 from __future__ import absolute_import, print_function, division
 import copy
-import numpy
+import numpy as np
 import theano
 from theano import Apply, scalar, config, Op
@@ -27,7 +27,7 @@ from .fp16_help import load_w, write_w
 def make_argument(v, name):
-    return ArrayArg(numpy.dtype(v.type.dtype), name)
+    return ArrayArg(np.dtype(v.type.dtype), name)
 def as_C_string_const(s):

--- a/theano/gpuarray/linalg.py
+++ b/theano/gpuarray/linalg.py
@@ -7,7 +7,7 @@ import warnings
 from theano import Op
 from theano.gpuarray import basic_ops, GpuArrayType
-import numpy
+import numpy as np
 from numpy.linalg.linalg import LinAlgError
 try:
@@ -107,7 +107,7 @@ class GpuCusolverSolve(Op):
                ctx.cusolver_handle = cusolver.cusolverDnCreate()
    def check_dev_info(self, dev_info):
-        val = numpy.asarray(dev_info)[0]
+        val = np.asarray(dev_info)[0]
        if val > 0:
            raise LinAlgError('A is singular')

--- a/theano/gpuarray/neighbours.py
+++ b/theano/gpuarray/neighbours.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 from theano import Op, Apply, config
 from theano.tensor.nnet.neighbours import Images2Neibs
@@ -256,8 +256,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
        dtype_neib_shape = node.inputs[1].dtype
        dtype_neib_step = node.inputs[2].dtype
        dtype_z = node.outputs[0].dtype
-        itemsize_ten4 = numpy.dtype(dtype_ten4).itemsize
+        itemsize_ten4 = np.dtype(dtype_ten4).itemsize
-        itemsize_z = numpy.dtype(dtype_z).itemsize
+        itemsize_z = np.dtype(dtype_z).itemsize
        typecode_z = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
        ten4, neib_shape, neib_step = inp
        z, = out

--- a/theano/gpuarray/nnet.py
+++ b/theano/gpuarray/nnet.py
 from __future__ import absolute_import, print_function, division
 import os
-import numpy
+import numpy as np
 from theano import Op, Apply, config
 from six import StringIO
@@ -195,13 +195,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
    def c_code(self, node, nodename, inp, out, sub):
        if node.inputs[0].type.context.kind != b'cuda':
            raise NotImplementedError('cuda only')
-        itemsize_x = numpy.dtype(node.inputs[0].dtype).itemsize
+        itemsize_x = np.dtype(node.inputs[0].dtype).itemsize
-        worksize_x = numpy.dtype(work_dtype(node.inputs[0].dtype)).itemsize
+        worksize_x = np.dtype(work_dtype(node.inputs[0].dtype)).itemsize
-        itemsize_b = numpy.dtype(node.inputs[1].dtype).itemsize
+        itemsize_b = np.dtype(node.inputs[1].dtype).itemsize
-        itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
+        itemsize_y_idx = np.dtype(node.inputs[2].dtype).itemsize
-        itemsize_nll = numpy.dtype(node.outputs[0].dtype).itemsize
+        itemsize_nll = np.dtype(node.outputs[0].dtype).itemsize
-        itemsize_sm = numpy.dtype(node.outputs[1].dtype).itemsize
+        itemsize_sm = np.dtype(node.outputs[1].dtype).itemsize
-        itemsize_am = numpy.dtype(node.outputs[2].dtype).itemsize
+        itemsize_am = np.dtype(node.outputs[2].dtype).itemsize
        x, b, y_idx = inp
        nll, sm, am = out
        fail = sub['fail']
@@ -307,15 +307,15 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
        if node.inputs[0].type.context.kind != b'cuda':
            raise NotImplementedError("cuda only")
        typecode_dx = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
-        itemsize_dnll = numpy.dtype(node.inputs[0].dtype).itemsize
+        itemsize_dnll = np.dtype(node.inputs[0].dtype).itemsize
-        itemsize_sm = numpy.dtype(node.inputs[1].dtype).itemsize
+        itemsize_sm = np.dtype(node.inputs[1].dtype).itemsize
-        itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
+        itemsize_y_idx = np.dtype(node.inputs[2].dtype).itemsize
-        itemsize_dx = numpy.dtype(node.outputs[0].dtype).itemsize
+        itemsize_dx = np.dtype(node.outputs[0].dtype).itemsize
        dtype_dnll = node.inputs[0].dtype
        dtype_sm = node.inputs[1].dtype
        dtype_y_idx = node.inputs[2].dtype
        dtype_dx = node.outputs[0].dtype
-        type_intp = gpuarray.dtype_to_ctype(numpy.intp)
+        type_intp = gpuarray.dtype_to_ctype(np.intp)
        dnll, sm, y_idx = inp
        dx, = out
        fail = sub['fail']
@@ -519,8 +519,8 @@ class GpuSoftmax(GpuKernelBase, Op):
        dtype_x = node.inputs[0].dtype
        work_x = work_dtype(dtype_x)
        dtype_z = node.outputs[0].dtype
-        itemsize_x = numpy.dtype(dtype_x).itemsize
+        itemsize_x = np.dtype(dtype_x).itemsize
-        itemsize_z = numpy.dtype(dtype_z).itemsize
+        itemsize_z = np.dtype(dtype_z).itemsize
        typecode = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
        x, = inp
        z, = out
@@ -719,9 +719,9 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
        dtype_b = node.inputs[1].dtype
        dtype_z = node.outputs[0].dtype
        work_x = work_dtype(dtype_x)
-        itemsize_x = numpy.dtype(dtype_x).itemsize
+        itemsize_x = np.dtype(dtype_x).itemsize
-        itemsize_b = numpy.dtype(dtype_b).itemsize
+        itemsize_b = np.dtype(dtype_b).itemsize
-        itemsize_z = numpy.dtype(dtype_z).itemsize
+        itemsize_z = np.dtype(dtype_z).itemsize
        typecode = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
        x, b = inp
        z, = out

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
 from __future__ import absolute_import, print_function, division
 import copy
-import numpy
+import numpy as np
 import logging
 import pdb
 import time
@@ -622,7 +622,7 @@ def local_gpualloc_memset_0(node):
        inp = node.inputs[0]
        if (isinstance(inp, GpuArrayConstant) and
                inp.data.size == 1 and
-                (numpy.asarray(inp.data) == 0).all()):
+                (np.asarray(inp.data) == 0).all()):
            new_op = gpu_alloc(node.op.context_name, memset_0=True)
            return [new_op(*node.inputs)]
@@ -632,7 +632,7 @@ def local_gpualloc_memset_0(node):
 def local_gpua_alloc_empty_to_zeros(node):
    if isinstance(node.op, GpuAllocEmpty):
        context_name = infer_context_name(*node.inputs)
-        z = numpy.asarray(0, dtype=node.outputs[0].dtype)
+        z = np.asarray(0, dtype=node.outputs[0].dtype)
        return [gpu_alloc(context_name)(as_gpuarray_variable(z, context_name),
                                        *node.inputs)]
 optdb.register('local_gpua_alloc_empty_to_zeros',
@@ -830,7 +830,7 @@ def local_gpua_shape_graph(op, context_name, inputs, outputs):
 def gpu_print_wrapper(op, cnda):
-    op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
+    op.old_op.global_fn(op.old_op, np.asarray(cnda))
 @register_opt('fast_compile')

--- a/theano/gpuarray/opt_util.py
+++ b/theano/gpuarray/opt_util.py
 from __future__ import absolute_import, print_function, division
 from functools import wraps
-import numpy
+import numpy as np
 from theano import tensor, scalar as scal, Constant
 from theano.gof import local_optimizer
@@ -11,7 +11,7 @@ from theano.tensor import (DimShuffle, get_scalar_constant_value,
 from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
 from .elemwise import GpuDimShuffle, GpuElemwise
-_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
+_one = scal.constant(np.asarray(1.0, dtype='float32'))
 def grab_cpu_scalar(v, nd):

--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import os
-import numpy
+import numpy as np
 from six import integer_types
 from six.moves import StringIO
@@ -128,7 +128,7 @@ class GpuSubtensor(HideC, Subtensor):
        def fix_idx(idx):
            if idx is None:
                return "0", 1
-            elif isinstance(idx, (numpy.integer, integer_types)):
+            elif isinstance(idx, (np.integer, integer_types)):
                return str(idx), 0
            elif isinstance(idx, gof.Type):
                return indices.pop(0), 0
@@ -155,7 +155,7 @@ class GpuSubtensor(HideC, Subtensor):
            else:
                if isinstance(idx, gof.Type):
                    start = indices.pop(0)
-                elif isinstance(idx, (numpy.integer, integer_types)):
+                elif isinstance(idx, (np.integer, integer_types)):
                    start = idx
                else:
                    assert 0, idx
@@ -511,7 +511,7 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
        # if there are more than one (narray > 1) it should be ignored.
        ap = 0
        for k, i in enumerate(list(nidx)):
-            if (isinstance(i, numpy.ndarray) and
+            if (isinstance(i, np.ndarray) and
                    i.ndim != 0):
                transp.remove(k)
                transp.insert(p, k)
@@ -545,7 +545,7 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
        x = x.__getitem__(idx_)
        # flatten the array-indexed dimensions
-        shape = ((numpy.prod(x.shape[0: p]),) +
+        shape = ((np.prod(x.shape[0: p]),) +
                 x.shape[p:])
        input_flat = x.reshape(shape)
@@ -644,7 +644,7 @@ class GpuAdvancedIncSubtensor1(Op):
        # content to index x and y (This is because we serve as
        # fallback for _dev20).
        if isinstance(idx, gpuarray.GpuArray):
-            idx = numpy.asarray(idx)
+            idx = np.asarray(idx)
        # If `y` has as many dimensions as `x`, then we want to iterate
        # jointly on `x` and `y`. Otherwise, it means `y` should be
@@ -877,10 +877,10 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
        dtype_y = node.inputs[1].dtype
        dtype_ind = node.inputs[2].dtype
        dtype_out = node.outputs[0].dtype
-        itemsize_x = numpy.dtype(dtype_x).itemsize
+        itemsize_x = np.dtype(dtype_x).itemsize
-        itemsize_y = numpy.dtype(dtype_y).itemsize
+        itemsize_y = np.dtype(dtype_y).itemsize
-        itemsize_ind = numpy.dtype(dtype_ind).itemsize
+        itemsize_ind = np.dtype(dtype_ind).itemsize
-        itemsize_out = numpy.dtype(dtype_out).itemsize
+        itemsize_out = np.dtype(dtype_out).itemsize
        flags = Kernel.get_flags(dtype_x, dtype_y, dtype_ind)
        type_x = gpuarray.dtype_to_ctype(dtype_x)
        type_y = gpuarray.dtype_to_ctype(dtype_y)
@@ -1007,10 +1007,10 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
        dtype_y = node.inputs[1].dtype
        dtype_ind = node.inputs[2].dtype
        dtype_out = node.outputs[0].dtype
-        itemsize_x = numpy.dtype(dtype_x).itemsize
+        itemsize_x = np.dtype(dtype_x).itemsize
-        itemsize_y = numpy.dtype(dtype_y).itemsize
+        itemsize_y = np.dtype(dtype_y).itemsize
-        itemsize_ind = numpy.dtype(dtype_ind).itemsize
+        itemsize_ind = np.dtype(dtype_ind).itemsize
-        itemsize_out = numpy.dtype(dtype_out).itemsize
+        itemsize_out = np.dtype(dtype_out).itemsize
        k_var = "k_vector_add_fast_" + nodename
        return super(GpuAdvancedIncSubtensor1_dev20, self).c_support_code_struct(node, nodename) + """
@@ -1112,7 +1112,7 @@ class GpuDiagonal(Subtensor):
        if x.size == 0:
            out_shape = [d for i, d in enumerate(x.shape)
                         if i not in (self.axis1, self.axis2)]
-            diag_size = numpy.min((x.shape[self.axis1], x.shape[self.axis2]))
+            diag_size = np.min((x.shape[self.axis1], x.shape[self.axis2]))
            out_shape.append(diag_size)
            z[0] = node.outputs[0].type.value_zeros(tuple(out_shape))
            return
@@ -1128,15 +1128,15 @@ class GpuDiagonal(Subtensor):
        if x.shape[stride_axis] < x.shape[slice_axis]:
            # in the bigger triangle
-            numstride = small_axis - numpy.max((
+            numstride = small_axis - np.max((
-                0, small_axis + numpy.abs(self.offset) - large_axis))
+                0, small_axis + np.abs(self.offset) - large_axis))
        else:
            # in the smaller triangle
-            numstride = small_axis - numpy.abs(self.offset)
+            numstride = small_axis - np.abs(self.offset)
-        slicer = [numpy.s_[:], ] * x.ndim
+        slicer = [np.s_[:], ] * x.ndim
-        slicer[stride_axis] = numpy.s_[:numstride]
+        slicer[stride_axis] = np.s_[:numstride]
-        slicer[slice_axis] = numpy.abs(self.offset)
+        slicer[slice_axis] = np.abs(self.offset)
        slicer = tuple(slicer)
        # step 2) Swap stride_axis to the last dim because we want the dim on

--- a/theano/gpuarray/tests/rnn_support.py
+++ b/theano/gpuarray/tests/rnn_support.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import theano
 import theano.tensor as T
-import numpy
+import numpy as np
 class Model(object):
@@ -27,9 +27,9 @@ class Model(object):
 def uniform(stdev, size):
    """uniform distribution with the given stdev and size"""
-    return numpy.random.uniform(
+    return np.random.uniform(
-        low=-stdev * numpy.sqrt(3),
+        low=-stdev * np.sqrt(3),
-        high=stdev * numpy.sqrt(3),
+        high=stdev * np.sqrt(3),
        size=size
    ).astype(theano.config.floatX)
@@ -37,7 +37,7 @@ def uniform(stdev, size):
 def linear_transform_weights(input_dim, output_dim,
                             param_list=None, name=""):
    "theano shared variable given input and output dimension"
-    weight_inialization = uniform(numpy.sqrt(2.0 / input_dim),
+    weight_inialization = uniform(np.sqrt(2.0 / input_dim),
                                  (input_dim, output_dim))
    W = theano.shared(weight_inialization, name=name)
@@ -49,7 +49,7 @@ def linear_transform_weights(input_dim, output_dim,
 def bias_weights(length, param_list=None, name=""):
    "theano shared variable for bias unit, given length"
-    bias_initialization = numpy.zeros(length).astype(theano.config.floatX)
+    bias_initialization = np.zeros(length).astype(theano.config.floatX)
    bias = theano.shared(
        bias_initialization,

--- a/theano/gpuarray/tests/test_abstractconv.py
+++ b/theano/gpuarray/tests/test_abstractconv.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, division
 from nose.plugins.skip import SkipTest
 from nose.tools import assert_raises
-import numpy
+import numpy as np
 from theano.tensor.nnet.tests import test_abstract_conv
 from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
@@ -250,7 +250,7 @@ class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
        self.filters = gpu_ftensor4()
        self.topgrad = gpu_ftensor4()
        self.constant_tensor = gpuarray.array(
-            numpy.zeros((3, 5, 7, 11), dtype='float32'),
+            np.zeros((3, 5, 7, 11), dtype='float32'),
            context=get_context(test_ctx_name))

--- a/theano/gpuarray/tests/test_basic_ops.py
+++ b/theano/gpuarray/tests/test_basic_ops.py
@@ -4,7 +4,7 @@ from theano.compat import izip
 from six import iteritems
-import numpy
+import numpy as np
 import theano
 import theano.tensor as T
 from theano.tensor import TensorType
@@ -29,7 +29,7 @@ from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
 from pygpu import gpuarray
 utt.seed_rng()
-rng = numpy.random.RandomState(seed=utt.fetch_seed())
+rng = np.random.RandomState(seed=utt.fetch_seed())
 def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False,
@@ -92,7 +92,7 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
            for testname, inputs in iteritems(cases):
                for _ in range(len(inputs)):
                    if type(inputs[_]) is float:
-                        inputs[_] = numpy.asarray(inputs[_],
+                        inputs[_] = np.asarray(inputs[_],
                                               dtype=theano.config.floatX)
                self.run_case(testname, inputs)
@@ -177,7 +177,7 @@ def test_transfer_cpu_gpu():
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
-    av = numpy.asarray(rng.rand(5, 4), dtype='float32')
+    av = np.asarray(rng.rand(5, 4), dtype='float32')
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
@@ -186,14 +186,14 @@ def test_transfer_cpu_gpu():
    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
-    assert numpy.all(fv == av)
+    assert np.all(fv == av)
 def test_transfer_gpu_gpu():
    g = GpuArrayType(dtype='float32', broadcastable=(False, False),
                     context_name=test_ctx_name)()
-    av = numpy.asarray(rng.rand(5, 4), dtype='float32')
+    av = np.asarray(rng.rand(5, 4), dtype='float32')
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    mode = mode_with_gpu.excluding('cut_gpua_host_transfers', 'local_cut_gpua_host_gpua')
    f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
@@ -211,7 +211,7 @@ def test_transfer_strided():
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
-    av = numpy.asarray(rng.rand(5, 8), dtype='float32')
+    av = np.asarray(rng.rand(5, 8), dtype='float32')
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    av = av[:, ::2]
@@ -223,7 +223,7 @@ def test_transfer_strided():
    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
-    assert numpy.all(fv == av)
+    assert np.all(fv == av)
 def gpu_alloc_expected(x, *shp):
@@ -237,16 +237,16 @@ GpuAllocTester = makeTester(
    op=alloc,
    gpu_op=GpuAlloc(test_ctx_name),
    cases=dict(
-        correct01=(rand(), numpy.int32(7)),
+        correct01=(rand(), np.int32(7)),
        # just gives a DeepCopyOp with possibly wrong results on the CPU
-        # correct01_bcast=(rand(1), numpy.int32(7)),
+        # correct01_bcast=(rand(1), np.int32(7)),
-        correct02=(rand(), numpy.int32(4), numpy.int32(7)),
+        correct02=(rand(), np.int32(4), np.int32(7)),
-        correct12=(rand(7), numpy.int32(4), numpy.int32(7)),
+        correct12=(rand(7), np.int32(4), np.int32(7)),
-        correct13=(rand(7), numpy.int32(2), numpy.int32(4),
+        correct13=(rand(7), np.int32(2), np.int32(4),
-                   numpy.int32(7)),
+                   np.int32(7)),
-        correct23=(rand(4, 7), numpy.int32(2), numpy.int32(4),
+        correct23=(rand(4, 7), np.int32(2), np.int32(4),
-                   numpy.int32(7)),
+                   np.int32(7)),
-        bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
+        bad_shape12=(rand(7), np.int32(7), np.int32(5)),
        )
 )
@@ -282,7 +282,7 @@ def test_shape():
    v = gpuarray.zeros((3, 4, 5), dtype='float32', context=get_context(test_ctx_name))
    f = theano.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
-    assert numpy.all(f(v) == (3, 4, 5))
+    assert np.all(f(v) == (3, 4, 5))
    if theano.config.mode != 'FAST_COMPILE':
        assert len(topo) == 4
        assert isinstance(topo[0].op, T.opt.Shape_i)
@@ -292,7 +292,7 @@ def test_shape():
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = theano.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
-    assert numpy.all(f(v) == (3, 4, 5))
+    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Shape)
@@ -300,7 +300,7 @@ def test_shape():
 def test_gpu_contiguous():
    a = T.fmatrix('a')
    i = T.iscalar('i')
-    a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
+    a_val = np.asarray(np.random.rand(4, 5), dtype='float32')
    # The reshape is needed otherwise we make the subtensor on the CPU
    # to transfer less data.
    f = theano.function([a, i], gpu_contiguous(a.reshape((5, 4))[::i]),
@@ -353,22 +353,22 @@ class G_Join_and_Split(test_basic.T_Join_and_Split):
        self.shared = gpuarray_shared_constructor
    def test_gpusplit_opt(self):
-        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        rng = np.random.RandomState(seed=utt.fetch_seed())
        m = self.shared(rng.rand(4, 6).astype(self.floatX))
        o = T.Split(2)(m, 0, [2, 2])
        f = theano.function([], o, mode=self.mode)
        assert any([isinstance(node.op, self.split_op_class)
                    for node in f.maker.fgraph.toposort()])
        o1, o2 = f()
-        assert numpy.allclose(o1, m.get_value(borrow=True)[:2])
+        assert np.allclose(o1, m.get_value(borrow=True)[:2])
-        assert numpy.allclose(o2, m.get_value(borrow=True)[2:])
+        assert np.allclose(o2, m.get_value(borrow=True)[2:])
 def test_gpujoin_gpualloc():
    a = T.fmatrix('a')
-    a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
+    a_val = np.asarray(np.random.rand(4, 5), dtype='float32')
    b = T.fmatrix('b')
-    b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32')
+    b_val = np.asarray(np.random.rand(3, 5), dtype='float32')
    f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4,
                        mode=mode_without_gpu)
@@ -387,7 +387,7 @@ def test_gpujoin_gpualloc():
                for node in f_gpu2.maker.fgraph.toposort()]) == 2
    assert sum([node.op == gpu_join
                for node in f_gpu2.maker.fgraph.toposort()]) == 1
-    assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
+    assert np.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
 def test_gpueye():
@@ -401,14 +401,14 @@ def test_gpueye():
            M = N
        N_symb = T.iscalar()
        M_symb = T.iscalar()
-        k_symb = numpy.asarray(0)
+        k_symb = np.asarray(0)
        out = T.eye(N_symb, M_symb, k_symb, dtype=dtype)
        f = theano.function([N_symb, M_symb],
                            T.stack(out),
                            mode=mode_with_gpu)
-        result = numpy.asarray(f(N, M))
+        result = np.asarray(f(N, M))
-        assert numpy.allclose(result, numpy.eye(N, M_, dtype=dtype))
+        assert np.allclose(result, np.eye(N, M_, dtype=dtype))
-        assert result.dtype == numpy.dtype(dtype)
+        assert result.dtype == np.dtype(dtype)
        assert any([isinstance(node.op, GpuEye)
                    for node in f.maker.fgraph.toposort()])
@@ -429,8 +429,8 @@ def test_hostfromgpu_shape_i():
                                'specialize')
    a = T.fmatrix('a')
    ca = theano.gpuarray.type.GpuArrayType('float32', (False, False))()
-    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
+    av = np.asarray(np.random.rand(5, 4), dtype='float32')
-    cv = gpuarray.asarray(numpy.random.rand(5, 4),
+    cv = gpuarray.asarray(np.random.rand(5, 4),
                          dtype='float32',
                          context=get_context(test_ctx_name))
@@ -464,7 +464,7 @@ def test_Gpujoin_inplace():
    non-empty element.
    """
    s = T.lscalar()
-    data = numpy.array([3, 4, 5], dtype=theano.config.floatX)
+    data = np.array([3, 4, 5], dtype=theano.config.floatX)
    x = gpuarray_shared_constructor(data, borrow=True)
    z = T.zeros((s,))
@@ -473,4 +473,4 @@ def test_Gpujoin_inplace():
    f = theano.function([s], theano.Out(c, borrow=True))
    assert x.get_value(borrow=True, return_internal_type=True) is f(0)
-    assert numpy.allclose(f(0), [3, 4, 5])
+    assert np.allclose(f(0), [3, 4, 5])
--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 from unittest import TestCase
 from nose.plugins.skip import SkipTest
 import itertools
-import numpy
+import numpy as np
 import theano
 from theano import tensor
@@ -138,8 +138,8 @@ def test_gemv_zeros():
    # Apply to an empty matrix shape (5,0) and an empty vector shape (0,)
    dim = 1000
-    A = numpy.zeros((dim, 0), dtype=theano.config.floatX)
+    A = np.zeros((dim, 0), dtype=theano.config.floatX)
-    b = numpy.zeros((0,), dtype=theano.config.floatX)
+    b = np.zeros((0,), dtype=theano.config.floatX)
    tmp = f(A, b)
-    assert numpy.allclose(tmp,
+    assert np.allclose(tmp,
-                          numpy.zeros((dim,)))
+                       np.zeros((dim,)))
--- a/theano/gpuarray/tests/test_blocksparse.py
+++ b/theano/gpuarray/tests/test_blocksparse.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import theano
 from theano import tensor
@@ -39,7 +39,7 @@ class BlockSparse_Gemv_and_Outer(test_blocksparse.BlockSparse_Gemv_and_Outer):
        o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
        gW = theano.grad(o.sum(), W)
-        lr = numpy.asarray(0.05, dtype='float32')
+        lr = np.asarray(0.05, dtype='float32')
        upd = W - lr * gW

--- a/theano/gpuarray/tests/test_cgpukernelbase.py
+++ b/theano/gpuarray/tests/test_cgpukernelbase.py
 from __future__ import division, absolute_import, print_function
-import numpy
+import numpy as np
 from six.moves import xrange
 import theano
@@ -69,4 +69,4 @@ def test_cgpukernelbase():
    r = f()
-    assert (numpy.asarray(r) == numpy.eye(4, 5, dtype='int32')).all()
+    assert (np.asarray(r) == np.eye(4, 5, dtype='int32')).all()
--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
--- a/theano/gpuarray/tests/test_elemwise.py
+++ b/theano/gpuarray/tests/test_elemwise.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import scipy.special
 import theano
@@ -45,8 +45,8 @@ def test_elemwise_pow():
            output = base ** exp
            f = theano.function([base, exp], output)
-            base_val = numpy.random.randint(0, 5, size=10).astype(dtype_base)
+            base_val = np.random.randint(0, 5, size=10).astype(dtype_base)
-            exp_val = numpy.random.randint(0, 3, size=10).astype(dtype_exp)
+            exp_val = np.random.randint(0, 3, size=10).astype(dtype_exp)
            # Call the function to make sure the output is valid
            out = f(base_val, exp_val)
@@ -68,7 +68,7 @@ class TestMathErrorFunctions(TestCase):
        # to have the GPU ops run on large data.
        default_array = [x / 10.0 for x in range(-50, 50)] * 1000
        for dtype in self.dtypes:
-            numpy_array = numpy.asarray(default_array, dtype=dtype)
+            numpy_array = np.asarray(default_array, dtype=dtype)
            self.default_arrays[dtype] = numpy_array
            self.expected_erfinv_outputs[dtype] = scipy.special.erfinv(numpy_array)
            self.expected_erfcinv_outputs[dtype] = scipy.special.erfcinv(numpy_array)
@@ -127,7 +127,7 @@ class test_float16():
        o = (cz - cz**2 +
             tensor.cast(x, 'int16') + tensor.cast(x, 'float32') +
             tensor.cast(w, 'float16') -
-             tensor.constant(numpy.float16(1.0)))
+             tensor.constant(np.float16(1.0)))
        theano.function([w, x, y], o, mode=mode_with_gpu)
@@ -154,9 +154,9 @@ class test_float16():
                             i8.astype('float32')],
                            mode=mode_with_gpu)
-        d1 = (numpy.random.rand(4) * 10).astype('float16')
+        d1 = (np.random.rand(4) * 10).astype('float16')
-        d2 = (numpy.random.rand(5) * 10).astype('float32')
+        d2 = (np.random.rand(5) * 10).astype('float32')
-        d3 = (numpy.random.rand(6) * 10).astype('int8')
+        d3 = (np.random.rand(6) * 10).astype('int8')
        res = f(d1, d2, d3)
        for i, out in enumerate(f.outputs):
@@ -337,9 +337,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
 def speed_reduce10():
-    import numpy
+    data = np.random.rand(1000, 1000).astype("float32")
-    import theano
-    data = numpy.random.rand(1000, 1000).astype("float32")
    m = theano.tensor.fmatrix()
    f = theano.function([m], [m.sum(axis=0), m.T.sum(axis=0)],
                        mode=mode_with_gpu)

--- a/theano/gpuarray/tests/test_fft.py
+++ b/theano/gpuarray/tests/test_fft.py
@@ -7,7 +7,6 @@ import theano.tensor as T
 from theano.tests import unittest_tools as utt
 import theano.gpuarray.fft
-import numpy.fft
 from .config import mode_with_gpu
@@ -37,7 +36,7 @@ class TestFFT(unittest.TestCase):
        res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, 1]))
-        rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
+        rfft_ref = np.fft.rfft(inputs_val, axis=1)
        utt.assert_allclose(rfft_ref, res_rfft_comp)
@@ -72,7 +71,7 @@ class TestFFT(unittest.TestCase):
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
-        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        rfft_ref = np.fft.rfftn(inputs_val, axes=(1, 2))
        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
@@ -91,7 +90,7 @@ class TestFFT(unittest.TestCase):
        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
-        inputs_val = numpy.random.random((1, N, N, 2)).astype('float32')
+        inputs_val = np.random.random((1, N, N, 2)).astype('float32')
        inputs = theano.shared(inputs_val)
        irfft = theano.gpuarray.fft.cuirfft(inputs)
@@ -123,7 +122,7 @@ class TestFFT(unittest.TestCase):
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
-        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        rfft_ref = np.fft.rfftn(inputs_val, axes=(1, 2))
        utt.assert_allclose(rfft_ref / N, res_rfft_comp, atol=1e-4, rtol=1e-4)
@@ -146,7 +145,7 @@ class TestFFT(unittest.TestCase):
        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft()
-        irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
+        irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
        utt.assert_allclose(irfft_ref * N, res_irfft, atol=1e-4, rtol=1e-4)
@@ -195,7 +194,7 @@ class TestFFT(unittest.TestCase):
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
-        rfft_ref = numpy.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))
+        rfft_ref = np.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))
        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
@@ -214,7 +213,7 @@ class TestFFT(unittest.TestCase):
        res_irfft = f_irfft()
        inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
-        irfft_ref = numpy.fft.irfftn(inputs_ref, s=(M, M), axes=(1, 2)) * M
+        irfft_ref = np.fft.irfftn(inputs_ref, s=(M, M), axes=(1, 2)) * M
        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
@@ -243,12 +242,12 @@ class TestFFT(unittest.TestCase):
        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
    def test_params(self):
-        inputs_val = numpy.random.random((1, N)).astype('float32')
+        inputs_val = np.random.random((1, N)).astype('float32')
        inputs = theano.shared(inputs_val)
        self.assertRaises(ValueError, theano.gpuarray.fft.curfft, inputs, norm=123)
-        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype('float32')
+        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
        inputs = theano.shared(inputs_val)
        self.assertRaises(ValueError, theano.gpuarray.fft.cuirfft, inputs, norm=123)

--- a/theano/gpuarray/tests/test_gemmcorr.py
+++ b/theano/gpuarray/tests/test_gemmcorr.py
 from __future__ import absolute_import, print_function, division
 import unittest
-import numpy
+import numpy as np
 import theano
 from theano import config
@@ -23,8 +23,8 @@ class TestCorrMM(unittest.TestCase):
        inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
        filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        filters_val = numpy.random.random(filters_shape).astype(config.floatX)
+        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)
@@ -122,11 +122,11 @@ class TestCorrMM(unittest.TestCase):
        filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
        dCdH_shape = [dCdH_shape[i] for i in (0, 3, 1, 2)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        dCdH_val = numpy.random.random(dCdH_shape).astype(config.floatX)
+        dCdH_val = np.random.random(dCdH_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        dCdH = gpuarray_shared_constructor(dCdH_val)
-        shape = gpuarray_shared_constructor(numpy.array(filters_shape[2:]))
+        shape = gpuarray_shared_constructor(np.array(filters_shape[2:]))
        if (subsample == (1, 1)):
            conv_ref = CorrMM_gradWeights(subsample=subsample)(
@@ -169,14 +169,14 @@ class TestCorrMM(unittest.TestCase):
        inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
        filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        filters_val = numpy.random.random(filters_shape).astype(config.floatX)
+        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)
        bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
        bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
-        bottom_shape = gpuarray_shared_constructor(numpy.array([bottom_height, bottom_width]))
+        bottom_shape = gpuarray_shared_constructor(np.array([bottom_height, bottom_width]))
        if (subsample == (1, 1)):
            conv_ref = CorrMM_gradInputs(subsample=subsample)(

--- a/theano/gpuarray/tests/test_gemmcorr3d.py
+++ b/theano/gpuarray/tests/test_gemmcorr3d.py
 from __future__ import absolute_import, print_function, division
 import unittest
-import numpy
+import numpy as np
 import theano
 from theano import config
@@ -23,8 +23,8 @@ class TestCorr3dMM(unittest.TestCase):
        inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        filters_val = numpy.random.random(filters_shape).astype(config.floatX)
+        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)
@@ -121,11 +121,11 @@ class TestCorr3dMM(unittest.TestCase):
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
        dCdH_shape = [dCdH_shape[i] for i in (0, 4, 1, 2, 3)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        dCdH_val = numpy.random.random(dCdH_shape).astype(config.floatX)
+        dCdH_val = np.random.random(dCdH_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        dCdH = gpuarray_shared_constructor(dCdH_val)
-        shape = gpuarray_shared_constructor(numpy.array(filters_shape[2:]))
+        shape = gpuarray_shared_constructor(np.array(filters_shape[2:]))
        if (subsample == (1, 1, 1)):
            conv_ref = Corr3dMM_gradWeights(subsample=subsample)(
@@ -168,15 +168,15 @@ class TestCorr3dMM(unittest.TestCase):
        inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
-        inputs_val = numpy.random.random(inputs_shape).astype(config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
-        filters_val = numpy.random.random(filters_shape).astype(config.floatX)
+        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)
        bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
        bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
        bottom_depth = (inputs_shape[4] - 1) * subsample[2] + filters_shape[4]
-        bottom_shape = gpuarray_shared_constructor(numpy.array([bottom_height, bottom_width, bottom_depth]))
+        bottom_shape = gpuarray_shared_constructor(np.array([bottom_height, bottom_width, bottom_depth]))
        if (subsample == (1, 1, 1)):
            conv_ref = Corr3dMM_gradInputs(subsample=subsample)(

--- a/theano/gpuarray/tests/test_linalg.py
+++ b/theano/gpuarray/tests/test_linalg.py
 from __future__ import absolute_import, division, print_function
 import unittest
-import numpy
+import numpy as np
 import theano
 from theano.tests import unittest_tools as utt
@@ -19,8 +19,8 @@ if not cusolver_available:
 class TestCusolver(unittest.TestCase):
    def run_gpu_solve(self, A_val, x_val, A_struct=None):
-        b_val = numpy.dot(A_val, x_val)
+        b_val = np.dot(A_val, x_val)
-        b_val_trans = numpy.dot(A_val.T, x_val)
+        b_val_trans = np.dot(A_val.T, x_val)
        A = theano.tensor.matrix("A", dtype="float32")
        b = theano.tensor.matrix("b", dtype="float32")
@@ -35,16 +35,16 @@ class TestCusolver(unittest.TestCase):
        fn = theano.function([A, b, b_trans], [solver, solver_trans], mode=mode_with_gpu)
        res = fn(A_val, b_val, b_val_trans)
-        x_res = numpy.array(res[0])
+        x_res = np.array(res[0])
-        x_res_trans = numpy.array(res[1])
+        x_res_trans = np.array(res[1])
        utt.assert_allclose(x_val, x_res)
        utt.assert_allclose(x_val, x_res_trans)
    def test_diag_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
+        A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
                           dtype="float32")
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
                                  1)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
@@ -52,42 +52,42 @@ class TestCusolver(unittest.TestCase):
        """
        Test when shape of b (k, m) is such as m > k
        """
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
+        A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
                           dtype="float32")
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
                                  A_val.shape[1] + 1)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
    def test_sym_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        A_sym = numpy.dot(A_val, A_val.T)
+        A_sym = np.dot(A_val, A_val.T)
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
                                  1)).astype("float32")
        self.run_gpu_solve(A_sym, x_val, 'symmetric')
    def test_orth_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        A_orth = numpy.linalg.svd(A_val)[0]
+        A_orth = np.linalg.svd(A_val)[0]
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1],
+        x_val = np.random.uniform(-0.4, 0.4, (A_orth.shape[1],
                                  1)).astype("float32")
        self.run_gpu_solve(A_orth, x_val)
    def test_uni_rand_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        x_val = numpy.random.uniform(-0.4, 0.4,
+        x_val = np.random.uniform(-0.4, 0.4,
                                  (A_val.shape[1], 4)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
    def test_linalgerrsym_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        x_val = numpy.random.uniform(-0.4, 0.4,
+        x_val = np.random.uniform(-0.4, 0.4,
                                  (A_val.shape[1], 4)).astype("float32")
-        A_val = numpy.dot(A_val.T, A_val)
+        A_val = np.dot(A_val.T, A_val)
        # make A singular
        A_val[:, 2] = A_val[:, 1] + A_val[:, 3]
@@ -99,9 +99,9 @@ class TestCusolver(unittest.TestCase):
        self.assertRaises(LinAlgError, fn, A_val, x_val)
    def test_linalgerr_solve(self):
-        numpy.random.seed(1)
+        np.random.seed(1)
-        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        x_val = numpy.random.uniform(-0.4, 0.4,
+        x_val = np.random.uniform(-0.4, 0.4,
                                  (A_val.shape[1], 4)).astype("float32")
        # make A singular
        A_val[:, 2] = 0

--- a/theano/gpuarray/tests/test_multinomial.py
+++ b/theano/gpuarray/tests/test_multinomial.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import unittest
@@ -62,10 +62,10 @@ def test_multinomial_large():
    assert any([type(node.op) is GPUAMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(10000 * 4,
+    pval = np.arange(10000 * 4,
                     dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)
    assert mval.shape == pval.shape
@@ -78,7 +78,7 @@ def test_multinomial_large():
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
-    asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
+    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
@@ -92,9 +92,9 @@ def test_gpu_opt_dtypes():
        f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any([type(node.op) is GPUAMultinomialFromUniform
                    for node in f.maker.fgraph.toposort()])
-        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
+        pval = np.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
-        uval = numpy.ones_like(pval[:, 0]) * 0.5
+        uval = np.ones_like(pval[:, 0]) * 0.5
        samples = f(pval, uval)
        assert samples.dtype == dtype, "%s != %s" % (samples.dtype, dtype)
@@ -112,9 +112,9 @@ def test_gpu_opt():
    f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([type(node.op) is GPUAMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
+    pval = np.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
    # Test with a row, it was failing in the past.
@@ -125,9 +125,9 @@ def test_gpu_opt():
    f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([type(node.op) is GPUAMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
+    pval = np.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
@@ -146,15 +146,15 @@ class test_OP_wor(unittest.TestCase):
        n_elements = 1000
        all_indices = range(n_elements)
-        numpy.random.seed(12345)
+        np.random.seed(12345)
        for i in [5, 10, 50, 100, 500, n_elements]:
-            uni = numpy.random.rand(i).astype(config.floatX)
+            uni = np.random.rand(i).astype(config.floatX)
-            pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            assert len(res) == i, res
-            assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
+            assert np.all(np.in1d(np.unique(res), all_indices)), res
    def test_fail_select_alot(self):
        """
@@ -170,9 +170,9 @@ class test_OP_wor(unittest.TestCase):
        n_elements = 100
        n_selected = 200
-        numpy.random.seed(12345)
+        np.random.seed(12345)
-        uni = numpy.random.rand(n_selected).astype(config.floatX)
+        uni = np.random.rand(n_selected).astype(config.floatX)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        self.assertRaises(ValueError, f, pvals, uni, n_selected)
@@ -191,18 +191,18 @@ class test_OP_wor(unittest.TestCase):
        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
-        numpy.random.seed(12345)
+        np.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
-        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)
+        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
        for rep in range(10000):
-            uni = numpy.random.rand(n_selected).astype(config.floatX)
+            uni = np.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
-        avg_diff = numpy.mean(abs(avg_pvals - pvals))
+        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff
@@ -222,14 +222,14 @@ class test_function_wor(unittest.TestCase):
        n_elements = 1000
        all_indices = range(n_elements)
-        numpy.random.seed(12345)
+        np.random.seed(12345)
        for i in [5, 10, 50, 100, 500, n_elements]:
-            pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, i)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            assert len(res) == i
-            assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
+            assert np.all(np.in1d(np.unique(res), all_indices)), res
    def test_fail_select_alot(self):
        """
@@ -246,8 +246,8 @@ class test_function_wor(unittest.TestCase):
        n_elements = 100
        n_selected = 200
-        numpy.random.seed(12345)
+        np.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        self.assertRaises(ValueError, f, pvals, n_selected)
@@ -267,17 +267,17 @@ class test_function_wor(unittest.TestCase):
        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
-        numpy.random.seed(12345)
+        np.random.seed(12345)
-        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
+        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
-        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)
+        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
        for rep in range(10000):
            res = f(pvals, n_selected)
-            res = numpy.squeeze(res)
+            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
-        avg_diff = numpy.mean(abs(avg_pvals - pvals))
+        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol
@@ -294,9 +294,9 @@ def test_gpu_opt_wor():
    assert any([type(node.op) is GPUAMultinomialWOReplacementFromUniform
                for node in f.maker.fgraph.toposort()])
    n_samples = 3
-    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
+    pval = np.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones(pval.shape[0] * n_samples) * 0.5
+    uval = np.ones(pval.shape[0] * n_samples) * 0.5
    f(pval, uval, n_samples)
    # Test with a row, it was failing in the past.
@@ -307,7 +307,7 @@ def test_gpu_opt_wor():
    f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([type(node.op) is GPUAMultinomialWOReplacementFromUniform
                for node in f.maker.fgraph.toposort()])
-    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
+    pval = np.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval, 1)
--- a/theano/gpuarray/tests/test_nerv.py
+++ b/theano/gpuarray/tests/test_nerv.py
 from __future__ import absolute_import, print_function, division
 from nose.plugins.skip import SkipTest
-import numpy
+import numpy as np
 from theano import function
 from theano.tests import unittest_tools as utt
@@ -40,10 +40,10 @@ def test_gemm16_value():
    f = function([m, m2], dot(m, m2), mode=mode_with_gpu)
-    v1 = numpy.random.random((3, 4)).astype('float16')
+    v1 = np.random.random((3, 4)).astype('float16')
-    v2 = numpy.random.random((4, 2)).astype('float16')
+    v2 = np.random.random((4, 2)).astype('float16')
    of = f(v1, v2)
-    on = numpy.dot(v1, v2)
+    on = np.dot(v1, v2)
    utt.assert_allclose(of, on)
--- a/theano/gpuarray/tests/test_nnet.py
+++ b/theano/gpuarray/tests/test_nnet.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import unittest
 import theano
@@ -46,13 +46,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()
-    xx = numpy.asarray(numpy.random.rand(batch_size, n_in),
+    xx = np.asarray(np.random.rand(batch_size, n_in),
-                       dtype=numpy.float32)
+                    dtype=np.float32)
-    yy = numpy.ones((batch_size,), dtype='int32')
+    yy = np.ones((batch_size,), dtype='int32')
-    b_values = numpy.zeros((n_out,), dtype='float32')
+    b_values = np.zeros((n_out,), dtype='float32')
-    W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32')
+    W_values = np.asarray(np.random.rand(n_in, n_out), dtype='float32')
-    dot_value = numpy.asarray(numpy.dot(xx, W_values), dtype='float32')
+    dot_value = np.asarray(np.dot(xx, W_values), dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result + b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
@@ -97,10 +97,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()
-    softmax_output_value = numpy.random.rand(batch_size,
+    softmax_output_value = np.random.rand(batch_size,
                                          n_out).astype('float32')
-    dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype='float32')
+    dnll_value = np.asarray(np.random.rand(batch_size), dtype='float32')
-    y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size)
+    y_idx_value = np.random.randint(low=0, high=5, size=batch_size)
    softmax_output = T.fmatrix()
    softmax_output /= softmax_output.sum(axis=1).reshape(
@@ -174,8 +174,8 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
                      GpuSoftmaxWithBias)
    def cmp(n, m):
-        data = numpy.random.uniform(1e-7, 1, (n, m)).astype(dtype=dtypeInput)
+        data = np.random.uniform(1e-7, 1, (n, m)).astype(dtype=dtypeInput)
-        b_data = numpy.random.uniform(1e-7, 1, (m,)).astype(dtype=dtypeBias)
+        b_data = np.random.uniform(1e-7, 1, (m,)).astype(dtype=dtypeBias)
        out = f(data, b_data)
        gout = f_gpu(data, b_data)
@@ -227,7 +227,7 @@ def softmax_unittest_template(dtypeInput):
                      GpuSoftmax)
    def cmp(n, m):
-        data = numpy.random.uniform(0, 1, (n, m)).astype(dtype=dtypeInput)
+        data = np.random.uniform(0, 1, (n, m)).astype(dtype=dtypeInput)
        out = f(data)
        gout = f_gpu(data)
@@ -301,7 +301,7 @@ class test_SoftMax(unittest.TestCase):
        return f, f_gpu
    def _cmp(self, n, m, f, f_gpu):
-        data = numpy.arange(n * m, dtype='float32').reshape(n, m)
+        data = np.arange(n * m, dtype='float32').reshape(n, m)
        out = f(data)
        gout = f_gpu(data)
        utt.assert_allclose(out, gout)

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 from nose.tools import assert_raises
 import theano
@@ -79,13 +79,13 @@ def test_local_gpu_contiguous():
 def test_flatten():
    m = theano.tensor.fmatrix()
    f = theano.function([m], m.flatten(), mode=mode_with_gpu)
-    val = numpy.random.rand(10, 11).astype("float32")
+    val = np.random.rand(10, 11).astype("float32")
    res = f(val)
    utt.assert_allclose(res, val.flatten())
    assert res.shape == val.flatten().shape
    assert GpuReshape in [type(node.op)
                          for node in f.maker.fgraph.toposort()]
-    val = numpy.random.rand(10, 11).astype("float32")
+    val = np.random.rand(10, 11).astype("float32")
    res = f(val)
    utt.assert_allclose(res, val.flatten())
    assert res.shape == val.flatten().shape
@@ -93,7 +93,7 @@ def test_flatten():
                          for node in f.maker.fgraph.toposort()]
    f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
-    val = numpy.random.rand(10, 11).astype("float32")
+    val = np.random.rand(10, 11).astype("float32")
    res = f(val)
    utt.assert_allclose(res, val)
    assert res.shape == val.shape
@@ -102,7 +102,7 @@ def test_flatten():
    m = theano.tensor.tensor3()
    f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
-    val = numpy.random.rand(10, 11, 12).astype("float32")
+    val = np.random.rand(10, 11, 12).astype("float32")
    res = f(val)
    utt.assert_allclose(res, val.reshape(10, -1))
    assert res.shape == val.reshape(10, -1).shape
@@ -120,7 +120,7 @@ def test_reduce():
        f = theano.function([m], getattr(m, method)(axis=0,
                                                    **param),
                            mode=mode_with_gpu)
-        val = numpy.random.rand(10, 11).astype("float32")
+        val = np.random.rand(10, 11).astype("float32")
        res = f(val)
        utt.assert_allclose(res, getattr(val, method)(axis=0))
        assert res.shape == (11,)
@@ -135,9 +135,9 @@ def test_reduce():
 def test_local_gpualloc_memset_0():
    i = theano.tensor.iscalar()
-    z = numpy.zeros((1,), dtype='float32')
+    z = np.zeros((1,), dtype='float32')
-    o = numpy.ones((1,), dtype='float32')
+    o = np.ones((1,), dtype='float32')
-    ones = numpy.ones((2,), dtype='float32')
+    ones = np.ones((2,), dtype='float32')
    # Test with 0 from CPU op.
    # Should not be transfered as the only client is the output
@@ -146,7 +146,7 @@ def test_local_gpualloc_memset_0():
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, theano.tensor.Alloc)
-    assert (numpy.asarray(f(6)) == 0).all()
+    assert (np.asarray(f(6)) == 0).all()
    # Test with 0 from CPU op.
    # Should be transfered as it is used by another op.
@@ -155,7 +155,7 @@ def test_local_gpualloc_memset_0():
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 3
    assert isinstance(topo[0].op, GpuAlloc)
-    assert (numpy.asarray(f(6)) == 0).all()
+    assert (np.asarray(f(6)) == 0).all()
    # Test with 0
    a = GpuAlloc(test_ctx_name)(z, i)
@@ -163,7 +163,7 @@ def test_local_gpualloc_memset_0():
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
-    assert (numpy.asarray(f(6)) == 0).all()
+    assert (np.asarray(f(6)) == 0).all()
    # Test with 1
    a = GpuAlloc(test_ctx_name)(o, i)
@@ -172,7 +172,7 @@ def test_local_gpualloc_memset_0():
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
-    assert (numpy.asarray(f(6)) == 1).all()
+    assert (np.asarray(f(6)) == 1).all()
    # Test with 1, 1
    a = GpuAlloc(test_ctx_name)(ones, i)
@@ -181,7 +181,7 @@ def test_local_gpualloc_memset_0():
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
-    assert (numpy.asarray(f(2)) == 1).all()
+    assert (np.asarray(f(2)) == 1).all()
 def test_local_gpualloc_empty():
@@ -219,7 +219,7 @@ def test_local_gpualloc_empty():
 def test_rebroadcast():
-    d = numpy.random.rand(10, 10).astype('float32')
+    d = np.random.rand(10, 10).astype('float32')
    v = theano.tensor.fmatrix()
    up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
    f = theano.function([v], [up], mode=mode_with_gpu)
@@ -257,14 +257,14 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
        f = theano.function([x, cond],
                            theano.ifelse.ifelse(cond, x.mean(), x.sum()),
                            mode=mode_with_gpu)
-        assert f(numpy.float32([1, 2, 3]), 0) == 6
+        assert f(np.float32([1, 2, 3]), 0) == 6
        x = tensor.vector()
        cond = tensor.scalar()
        f = theano.function([x, cond],
                            theano.ifelse.ifelse(cond, x.mean(), x.sum()),
                            mode=mode_with_gpu)
-        assert f(numpy.float32([1, 2, 3]), 0) == 6
+        assert f(np.float32([1, 2, 3]), 0) == 6
 def test_print_op():
@@ -277,7 +277,7 @@ def test_print_op():
    assert isinstance(topo[1].op, theano.printing.Print)
    assert isinstance(topo[2].op, GpuElemwise)
    assert topo[3].op == host_from_gpu
-    f(numpy.random.random((5, 5)).astype('float32'))
+    f(np.random.random((5, 5)).astype('float32'))
 def test_pdbbreakpoint_op():
@@ -306,7 +306,7 @@ def test_local_gpu_elemwise_careduce():
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 3
    assert topo[1].op.pre_scalar_op == theano.scalar.sqr
-    data = numpy.random.rand(3, 4).astype(theano.config.floatX)
+    data = np.random.rand(3, 4).astype(theano.config.floatX)
    utt.assert_allclose(f(data), (data * data).sum())
    o = (x * x).sum(axis=1)
@@ -328,15 +328,15 @@ def test_local_lift_dot22scalar():
                   for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(isinstance(n.op, GpuGemm)
               for n in f_gpu.maker.fgraph.apply_nodes)
-    x_val = numpy.random.random((2, 3)).astype(theano.config.floatX)
+    x_val = np.random.random((2, 3)).astype(theano.config.floatX)
-    y_val = numpy.random.random((3, 4)).astype(theano.config.floatX)
+    y_val = np.random.random((3, 4)).astype(theano.config.floatX)
    a_val = 0.5
    utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val))
 def test_local_gpu_subtensor():
    # Test shared forced on CPU.
-    t = tensor._shared(numpy.zeros(20, "float32"))
+    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
@@ -367,7 +367,7 @@ def test_local_gpu_subtensor():
    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
-    t = tensor._shared(numpy.zeros(20, "float32"))
+    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
@@ -386,9 +386,9 @@ def test_local_gpu_elemwise():
    b = tensor.fmatrix()
    c = tensor.fmatrix()
-    a_v = (numpy.random.rand(4, 5) * 10).astype("int8")
+    a_v = (np.random.rand(4, 5) * 10).astype("int8")
-    b_v = (numpy.random.rand(4, 5) * 10).astype("float32")
+    b_v = (np.random.rand(4, 5) * 10).astype("float32")
-    c_v = (numpy.random.rand(4, 5) * 10).astype("float32")
+    c_v = (np.random.rand(4, 5) * 10).astype("float32")
    # Due to optimization order, this composite is created when all
    # the op are on the gpu.
@@ -440,7 +440,7 @@ def test_local_gpu_elemwise():
    utt.assert_allclose(out[1], a_v * c_v)
    # Test non-contiguous input
-    c = gpuarray_shared_constructor(numpy.asarray(c_v, dtype='float32'))
+    c = gpuarray_shared_constructor(np.asarray(c_v, dtype='float32'))
    f = theano.function([a, b], outs_op(a[::2], b[::2], c[::2]),
                        mode=mode_with_gpu)
    out = f(a_v, b_v)
@@ -462,7 +462,7 @@ def test_local_lift_abstractconv_gpu_shape():
 def test_local_assert_no_cpu_op():
-    rng = numpy.random.RandomState(utt.fetch_seed())
+    rng = np.random.RandomState(utt.fetch_seed())
    m = rng.uniform(-1, 1, (10, 10)).astype("float32")
    ms = gpuarray_shared_constructor(m, name="m_shared")
    out = theano.tensor.tanh(ms).dot(ms.T)
@@ -512,6 +512,6 @@ def test_local_lift_solve():
                   for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(isinstance(n.op, GpuCusolverSolve)
               for n in f_gpu.maker.fgraph.apply_nodes)
-    A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
+    A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-    b_val = numpy.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
+    b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
--- a/theano/gpuarray/tests/test_pickle.py
+++ b/theano/gpuarray/tests/test_pickle.py
@@ -12,7 +12,7 @@ from six import reraise
 from nose.plugins.skip import SkipTest
 from nose.tools import assert_raises
-import numpy
+import numpy as np
 from theano.compat import PY3
 from theano import config
@@ -75,7 +75,7 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag2():
                    reraise(SkipTest, exc_value, exc_trace)
                raise
-        assert isinstance(mat, numpy.ndarray)
+        assert isinstance(mat, np.ndarray)
        assert mat[0] == -42.0
    finally:

--- a/theano/gpuarray/tests/test_pool.py
+++ b/theano/gpuarray/tests/test_pool.py
@@ -4,7 +4,7 @@ import unittest
 import copy
 import itertools
-import numpy
+import numpy as np
 import theano
 from theano import gradient
 from theano import tensor
@@ -81,7 +81,7 @@ def test_pool2d():
            (3, 2, 6, 6, 6, 5),
            (3, 2, 6, 6, 6, 5, 7), ]
-    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
+    np.random.RandomState(utt.fetch_seed()).shuffle(shps)
    test_ws = (2, 2), (3, 2), (1, 1)
    test_st = (2, 2), (3, 2), (1, 1)
    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
@@ -113,7 +113,7 @@ def test_pool2d():
                            for node in f.maker.fgraph.toposort()])
                assert any([isinstance(node.op, Pool)
                            for node in f2.maker.fgraph.toposort()])
-                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
@@ -131,7 +131,7 @@ def test_pool2d():
                assert any([isinstance(node.op, gop2)
                            for node in g2.maker.fgraph.toposort()])
-                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
                # test rop and grad grad for max pooling
                # for average pooling grad grad is just average pooling grad
@@ -151,7 +151,7 @@ def test_pool2d():
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gr2.maker.fgraph.toposort()
                ])
-                assert numpy.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)
                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
@@ -166,7 +166,7 @@ def test_pool2d():
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gg2.maker.fgraph.toposort()
                ])
-                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
 def test_pool3d():
@@ -191,7 +191,7 @@ def test_pool3d():
            (3, 2, 6, 6, 6, 5),
            (3, 2, 6, 6, 6, 5, 7), ]
-    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
+    np.random.RandomState(utt.fetch_seed()).shuffle(shps)
    test_ws = (2, 2, 2), (3, 2, 3), (1, 1, 1)
    test_st = (2, 2, 2), (2, 3, 2), (1, 1, 1)
    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
@@ -223,7 +223,7 @@ def test_pool3d():
                            for node in f.maker.fgraph.toposort()])
                assert any([isinstance(node.op, Pool)
                            for node in f2.maker.fgraph.toposort()])
-                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
@@ -241,7 +241,7 @@ def test_pool3d():
                assert any([isinstance(node.op, gop2)
                            for node in g2.maker.fgraph.toposort()])
-                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
                # test rop and grad grad for max pooling
                # for average pooling grad grad is just average pooling grad
@@ -261,7 +261,7 @@ def test_pool3d():
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gr2.maker.fgraph.toposort()
                ])
-                assert numpy.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)
                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
@@ -276,4 +276,4 @@ def test_pool3d():
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gg2.maker.fgraph.toposort()
                ])
-                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
+                assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
--- a/theano/gpuarray/tests/test_scan.py
+++ b/theano/gpuarray/tests/test_scan.py
 from __future__ import absolute_import, print_function, division
 from unittest import TestCase
-import numpy
+import numpy as np
 from six.moves import xrange
 import theano
@@ -43,19 +43,19 @@ class T_Scan(TestCase):
                             allow_input_downcast=True,
                             mode=mode)
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()
-        v_u = numpy.asarray(v_u, dtype='float32')
+        v_u = np.asarray(v_u, dtype='float32')
-        v_x0 = numpy.asarray(v_x0, dtype='float32')
+        v_x0 = np.asarray(v_x0, dtype='float32')
-        W = numpy.asarray(W, dtype='float32')
+        W = np.asarray(W, dtype='float32')
-        W_in = numpy.asarray(W_in, dtype='float32')
+        W_in = np.asarray(W_in, dtype='float32')
        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
@@ -115,14 +115,14 @@ class T_Scan(TestCase):
                             mode=mode_with_gpu)
        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()
        # compute the output in numpy
-        v_out = numpy.zeros((4,))
+        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
@@ -177,20 +177,20 @@ class T_Scan(TestCase):
                             mode=mode_with_gpu)
        # get random initial values
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()
        # compute the output in numpy
-        v_out1 = numpy.zeros((4,))
+        v_out1 = np.zeros((4,))
-        v_out2 = numpy.zeros((4,), dtype='int64')
+        v_out2 = np.zeros((4,), dtype='int64')
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in xrange(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
-            v_out2[step] = numpy.int64(v_u[step] + v_out1[step - 1])
+            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])
        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_out1, v_out1)
@@ -212,8 +212,8 @@ class T_Scan(TestCase):
                        for node in scan_node_topo])
    def test_gpu4_gibbs_chain(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
+        rng = np.random.RandomState(utt.fetch_seed())
-        v_vsample = numpy.array(rng.binomial(1, .5, size=(3, 20),),
+        v_vsample = np.array(rng.binomial(1, .5, size=(3, 20),),
                             dtype='float32')
        vsample = theano.shared(v_vsample)
        trng = theano.sandbox.rng_mrg.MRG_RandomStreams(

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import unittest
 import theano
@@ -47,8 +47,8 @@ def test_advinc_subtensor1():
    # Test the second case in the opt local_gpu_advanced_incsubtensor1
    for shp in [(3, 3), (3, 3, 3)]:
        shared = gpuarray_shared_constructor
-        xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
+        xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1
-        yval = numpy.empty((2,) + shp[1:], dtype='float32')
+        yval = np.empty((2,) + shp[1:], dtype='float32')
        yval[:] = 10
        x = shared(xval, name='x')
        y = tensor.tensor(dtype='float32',
@@ -61,7 +61,7 @@ def test_advinc_subtensor1():
        rval = f(yval)
        rep = xval.copy()
        rep[[0, 2]] += yval
-        assert numpy.allclose(rval, rep)
+        assert np.allclose(rval, rep)
 def test_advinc_subtensor1_dtype():
@@ -69,8 +69,8 @@ def test_advinc_subtensor1_dtype():
    shp = (3, 4)
    for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
        shared = gpuarray_shared_constructor
-        xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
+        xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
-        yval = numpy.empty((2,) + shp[1:], dtype=dtype2)
+        yval = np.empty((2,) + shp[1:], dtype=dtype2)
        yval[:] = 10
        x = shared(xval, name='x')
        y = tensor.tensor(dtype=yval.dtype,
@@ -83,7 +83,7 @@ def test_advinc_subtensor1_dtype():
        rval = f(yval)
        rep = xval.copy()
        rep[[0, 2]] += yval
-        assert numpy.allclose(rval, rep)
+        assert np.allclose(rval, rep)
 def test_advinc_subtensor1_vector_scalar():
@@ -91,8 +91,8 @@ def test_advinc_subtensor1_vector_scalar():
    shp = (3,)
    for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
        shared = gpuarray_shared_constructor
-        xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
+        xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
-        yval = numpy.asarray(10, dtype=dtype2)
+        yval = np.asarray(10, dtype=dtype2)
        x = shared(xval, name='x')
        y = tensor.tensor(dtype=yval.dtype,
                          broadcastable=(False,) * len(yval.shape),
@@ -104,14 +104,14 @@ def test_advinc_subtensor1_vector_scalar():
        rval = f(yval)
        rep = xval.copy()
        rep[[0, 2]] += yval
-        assert numpy.allclose(rval, rep)
+        assert np.allclose(rval, rep)
 def test_incsub_f16():
    shp = (3, 3)
    shared = gpuarray_shared_constructor
-    xval = numpy.arange(numpy.prod(shp), dtype='float16').reshape(shp) + 1
+    xval = np.arange(np.prod(shp), dtype='float16').reshape(shp) + 1
-    yval = numpy.empty((2,) + shp[1:], dtype='float16')
+    yval = np.empty((2,) + shp[1:], dtype='float16')
    yval[:] = 2
    x = shared(xval, name='x')
    y = tensor.tensor(dtype='float16',
@@ -124,7 +124,7 @@ def test_incsub_f16():
    rval = f(yval)
    rep = xval.copy()
    rep[[0, 2]] += yval
-    assert numpy.allclose(rval, rep)
+    assert np.allclose(rval, rep)
    expr = tensor.inc_subtensor(x[1:], y)
    f = theano.function([y], expr, mode=mode_with_gpu)
@@ -133,7 +133,7 @@ def test_incsub_f16():
    rval = f(yval)
    rep = xval.copy()
    rep[1:] += yval
-    assert numpy.allclose(rval, rep)
+    assert np.allclose(rval, rep)
 class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
@@ -159,7 +159,7 @@ def test_adv_subtensor():
    # Test the advancedsubtensor on gpu.
    shp = (2, 3, 4)
    shared = gpuarray_shared_constructor
-    xval = numpy.arange(numpy.prod(shp), dtype=theano.config.floatX).reshape(shp)
+    xval = np.arange(np.prod(shp), dtype=theano.config.floatX).reshape(shp)
    idx1, idx2 = tensor.ivectors('idx1', 'idx2')
    idxs = [idx1, None, slice(0, 2, 1), idx2, None]
    x = shared(xval, name='x')
@@ -171,27 +171,27 @@ def test_adv_subtensor():
    idx2_val = [0, 1]
    rval = f(idx1_val, idx2_val)
    rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
-    assert numpy.allclose(rval, rep)
+    assert np.allclose(rval, rep)
 class test_gpudiagonal(unittest.TestCase):
    def test_matrix(self):
        x = tensor.matrix()
-        np_x = numpy.arange(77).reshape(7, 11).astype(theano.config.floatX)
+        np_x = np.arange(77).reshape(7, 11).astype(theano.config.floatX)
        fn = theano.function([x], GpuDiagonal()(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), np_x.diagonal())
+        assert np.allclose(fn(np_x), np_x.diagonal())
        fn = theano.function([x], GpuDiagonal(2)(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), np_x.diagonal(2))
+        assert np.allclose(fn(np_x), np_x.diagonal(2))
        fn = theano.function([x], GpuDiagonal(-3)(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), np_x.diagonal(-3))
+        assert np.allclose(fn(np_x), np_x.diagonal(-3))
    def test_tensor(self):
        x = tensor.tensor4()
-        np_x = numpy.arange(30107).reshape(7, 11, 17, 23).astype(theano.config.floatX)
+        np_x = np.arange(30107).reshape(7, 11, 17, 23).astype(theano.config.floatX)
        for offset, axis1, axis2 in [
                (1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
                (-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
                (2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
-            assert numpy.allclose(
+            assert np.allclose(
                GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
                np_x.diagonal(offset, axis1, axis2))
--- a/theano/gpuarray/tests/test_type.py
+++ b/theano/gpuarray/tests/test_type.py
 from __future__ import absolute_import, print_function, division
 import os
-import numpy
+import numpy as np
 import theano
 from theano.compat import PY3
@@ -33,10 +33,10 @@ def test_values_eq_approx():
    a = rand_gpuarray(20, dtype='float32')
    assert GpuArrayType.values_eq_approx(a, a)
    b = a.copy()
-    b[0] = numpy.asarray(b[0]) + 1.
+    b[0] = np.asarray(b[0]) + 1.
    assert not GpuArrayType.values_eq_approx(a, b)
    b = a.copy()
-    b[0] = -numpy.asarray(b[0])
+    b[0] = -np.asarray(b[0])
    assert not GpuArrayType.values_eq_approx(a, b)
@@ -50,7 +50,7 @@ def test_specify_shape():
 def test_filter_float():
    theano.compile.shared_constructor(gpuarray_shared_constructor)
    try:
-        s = theano.shared(numpy.array(0.0, dtype='float32'),
+        s = theano.shared(np.array(0.0, dtype='float32'),
                          target=test_ctx_name)
        theano.function([], updates=[(s, 0.0)])
    finally:
@@ -73,6 +73,6 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
                u = CompatUnpickler(fp)
            mat = u.load()
            assert isinstance(mat, pygpu.gpuarray.GpuArray)
-            assert numpy.asarray(mat)[0] == -42.0
+            assert np.asarray(mat)[0] == -42.0
    finally:
        config.experimental.unpickle_gpu_on_cpu = oldflag
--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import six.moves.copyreg as copyreg
 from six import iteritems
 import warnings
@@ -226,7 +226,7 @@ class GpuArrayType(Type):
                converted_data = theano._asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
-                if TensorType.values_eq(numpy.asarray(data),
+                if TensorType.values_eq(np.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data
@@ -293,18 +293,18 @@ class GpuArrayType(Type):
            return False
        if force_same_dtype and a.typecode != b.typecode:
            return False
-        a_eq_b = numpy.asarray(compare(a, '==', b))
+        a_eq_b = np.asarray(compare(a, '==', b))
        if a_eq_b.all():
            return True
        # maybe the trouble is that there are NaNs
-        a = numpy.asarray(a)
+        a = np.asarray(a)
-        b = numpy.asarray(b)
+        b = np.asarray(b)
-        a_missing = numpy.isnan(a)
+        a_missing = np.isnan(a)
        if a_missing.any():
-            b_missing = numpy.isnan(b)
+            b_missing = np.isnan(b)
-            return numpy.all(a_eq_b + (a_missing == b_missing))
+            return np.all(a_eq_b + (a_missing == b_missing))
        else:
            return False
@@ -326,16 +326,16 @@ class GpuArrayType(Type):
                rtol_ = rtol
            if atol is not None:
                atol_ = atol
-            res = elemwise2(a, '', b, a, odtype=numpy.dtype('bool'),
+            res = elemwise2(a, '', b, a, odtype=np.dtype('bool'),
                            op_tmpl="res = (fabs(a - b) <"
                            "(%(atol_)s + %(rtol_)s * fabs(b)))" %
                            locals())
-            ret = numpy.asarray(res).all()
+            ret = np.asarray(res).all()
            if ret:
                return True
            # maybe the trouble is that there are NaNs
-            an = numpy.asarray(a)
+            an = np.asarray(a)
-            bn = numpy.asarray(b)
+            bn = np.asarray(b)
            return tensor.TensorType.values_eq_approx(
                an, bn, allow_remove_inf=allow_remove_inf,
                allow_remove_nan=allow_remove_nan, rtol=rtol, atol=atol)
@@ -408,9 +408,9 @@ class GpuArrayType(Type):
    def get_size(self, shape_info):
        if shape_info:
-            return numpy.prod(shape_info) * numpy.dtype(self.dtype).itemsize
+            return np.prod(shape_info) * np.dtype(self.dtype).itemsize
        else:
-            return numpy.dtype(self.dtype).itemsize
+            return np.dtype(self.dtype).itemsize
    def c_declare(self, name, sub, check_input=True):
        return """
@@ -470,7 +470,7 @@ class GpuArrayType(Type):
                '<gpuarray_api.h>']
    def c_header_dirs(self):
-        return [pygpu.get_include(), numpy.get_include()]
+        return [pygpu.get_include(), np.get_include()]
    def c_libraries(self):
        return ['gpuarray']
@@ -509,7 +509,7 @@ class GpuArrayVariable(_operators, Variable):
    # override the default
    def __repr_test_value__(self):
-        return repr(numpy.array(theano.gof.op.get_test_value(self)))
+        return repr(np.array(theano.gof.op.get_test_value(self)))
 GpuArrayType.Variable = GpuArrayVariable
@@ -534,13 +534,13 @@ class GpuArrayConstant(_operators, Constant):
    """
    def signature(self):
-        return GpuArraySignature((self.type, numpy.asarray(self.data)))
+        return GpuArraySignature((self.type, np.asarray(self.data)))
    def __str__(self):
        if self.name is not None:
            return self.name
        try:
-            np_data = numpy.asarray(self.data)
+            np_data = np.asarray(self.data)
        except gpuarray.GpuArrayException:
            np_data = self.data
        return "GpuArrayConstant{%s}" % np_data
@@ -568,7 +568,7 @@ class GpuArraySharedVariable(_operators, SharedVariable):
            else:
                return self.container.value.copy()
        else:
-            return numpy.asarray(self.container.value)
+            return np.asarray(self.container.value)
    def set_value(self, value, borrow=False):
        if isinstance(value, pygpu.gpuarray.GpuArray):
@@ -601,7 +601,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
    if target == 'gpu' or target == 'cpu':
        raise TypeError('not for me')
-    if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
+    if not isinstance(value, (np.ndarray, pygpu.gpuarray.GpuArray)):
        raise TypeError('ndarray or GpuArray required')
    if target is notset:
@@ -809,7 +809,7 @@ copyreg.constructor(GpuArray_unpickler)
 def GpuArray_pickler(cnda):
    ctx_name = _name_for_ctx(cnda.context)
-    return (GpuArray_unpickler, (numpy.asarray(cnda), ctx_name))
+    return (GpuArray_unpickler, (np.asarray(cnda), ctx_name))
 # In case pygpu is not imported.
 if pygpu is not None: