Merge pull request #5835 from Amrithasuresh/master

Updated numpy as np #4218

Merge pull request #5835 from Amrithasuresh/master
e79c4e4c · Frédéric Bastien · GitHub · d0524fe5 · 7168c81b · e79c4e4c
--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
 from __future__ import absolute_import, print_function, division
-import numpy as N
+import numpy as np
 from six.moves import xrange
 import theano
@@ -407,7 +407,7 @@ class Conv3D(theano.Op):
                                    long long Vposl = Vpos;
                                    for (int m = 0; m < filterDur; m++) {
-                                      //H[i,r,c,t,:] += N.dot(W[:,k,l,m,:],V[i,dr*r+k,dc*c+l,dt*t+m,:])
+                                      //H[i,r,c,t,:] += np.dot(W[:,k,l,m,:],V[i,dr*r+k,dc*c+l,dt*t+m,:])
                                      //note: changing the weights so that outputChannels and inputChannels were the last two rather than
@@ -619,8 +619,8 @@ def computeH(V, W, b, d):
    outputWidth = int((vidWidth - filterWidth) / dy) + 1
    outputDur = int((vidDur - filterDur) / dt) + 1
-    H = N.zeros((batchSize, outputHeight,
+    H = np.zeros((batchSize, outputHeight,
-                outputWidth, outputDur, outputChannels), dtype=V.dtype)
+                 outputWidth, outputDur, outputChannels), dtype=V.dtype)
    # H[i,j,x,y,t] = b_j + sum_k sum_l sum_m sum_z W[j,z,k,l,m] V[i,z, dx*x+k,dy*y+l,dt*t+m]
    for i in xrange(0, H.shape[0]):

--- a/theano/tensor/nnet/ConvGrad3D.py
+++ b/theano/tensor/nnet/ConvGrad3D.py
 from __future__ import absolute_import, print_function, division
 from six.moves import xrange
-import numpy as N
+import numpy as np
 import theano
 from theano.tensor import basic as T
@@ -71,7 +71,7 @@ class ConvGrad3D(theano.Op):
        assert V.shape[0] == batchSize
        dr, dc, dt = d
-        dCdW = N.zeros(WShape, dtype=V.dtype)
+        dCdW = np.zeros(WShape, dtype=V.dtype)
        # print 'computing output of shape '+str(WShape)

--- a/theano/tensor/nnet/ConvTransp3D.py
+++ b/theano/tensor/nnet/ConvTransp3D.py
 from __future__ import absolute_import, print_function, division
-import numpy as N
+import numpy as np
 from six.moves import xrange
 import theano
@@ -385,8 +385,8 @@ def computeR(W, b, d, H, Rshape=None):
    # print "video size: "+str((videoHeight, videoWidth, videoDur))
-    R = N.zeros((batchSize, videoHeight,
+    R = np.zeros((batchSize, videoHeight,
-                videoWidth, videoDur, inputChannels), dtype=H.dtype)
+                 videoWidth, videoDur, inputChannels), dtype=H.dtype)
    # R[i,j,r,c,t] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, j, rk, ck, tk] * H[i,k,rc,cc,tc]
    for i in xrange(0, batchSize):
@@ -399,12 +399,12 @@ def computeR(W, b, d, H, Rshape=None):
                    for t in xrange(0, videoDur):
                        R[i, r, c, t, j] = b[j]
-                        ftc = max([0, int(N.ceil(
+                        ftc = max([0, int(np.ceil(
                            float(t - filterDur + 1) / float(dt)))])
-                        fcc = max([0, int(N.ceil(
+                        fcc = max([0, int(np.ceil(
                            float(c - filterWidth + 1) / float(dc)))])
-                        rc = max([0, int(N.ceil(
+                        rc = max([0, int(np.ceil(
                            float(r - filterHeight + 1) / float(dr)))])
                        while rc < outputHeight:
                            rk = r - rc * dr
@@ -423,7 +423,7 @@ def computeR(W, b, d, H, Rshape=None):
                                    if tk < 0:
                                        break
-                                    R[i, r, c, t, j] += N.dot(
+                                    R[i, r, c, t, j] += np.dot(
                                        W[:, rk, ck, tk, j], H[i, rc, cc, tc, :])
                                    tc += 1

--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -17,7 +17,6 @@ from theano.gof import Apply, Op
 from six.moves import xrange
 import warnings
-import numpy
 import numpy as np
 try:
@@ -69,7 +68,7 @@ def get_conv_output_shape(image_shape, kernel_shape,
    nkern, kshp = kernel_shape[0], kernel_shape[2:]
    if filter_dilation is None:
-        filter_dilation = numpy.ones(len(subsample), dtype='int')
+        filter_dilation = np.ones(len(subsample), dtype='int')
    if isinstance(border_mode, tuple):
        out_shp = tuple(get_conv_shape_1axis(
@@ -181,7 +180,7 @@ def get_conv_gradweights_shape(image_shape, top_shape,
    nchan, topshp = top_shape[1], top_shape[2:]
    if filter_dilation is None:
-        filter_dilation = numpy.ones(len(subsample), dtype='int')
+        filter_dilation = np.ones(len(subsample), dtype='int')
    if isinstance(border_mode, tuple):
        out_shp = tuple(get_conv_gradweights_shape_1axis(
@@ -286,7 +285,7 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
    nkern, kshp = kernel_shape[1], kernel_shape[2:]
    if filter_dilation is None:
-        filter_dilation = numpy.ones(len(subsample), dtype='int')
+        filter_dilation = np.ones(len(subsample), dtype='int')
    if isinstance(border_mode, tuple):
        out_shp = tuple(get_conv_gradinputs_shape_1axis(
@@ -1508,11 +1507,11 @@ class BaseAbstractConv(Op):
        out_shape = get_conv_output_shape(img.shape, kern.shape,
                                          mode, [1] * self.convdim, dilation)
-        out = numpy.zeros(out_shape, dtype=img.dtype)
+        out = np.zeros(out_shape, dtype=img.dtype)
        dil_kern_shp = kern.shape[:-self.convdim] + tuple(
            (kern.shape[-self.convdim + i] - 1) * dilation[i] + 1
            for i in range(self.convdim))
-        dilated_kern = numpy.zeros(dil_kern_shp, dtype=kern.dtype)
+        dilated_kern = np.zeros(dil_kern_shp, dtype=kern.dtype)
        dilated_kern[(slice(None), slice(None)) +
                     tuple(slice(None, None, dilation[i]) for i in range(self.convdim))
                     ] = kern
@@ -1522,7 +1521,7 @@ class BaseAbstractConv(Op):
            bval = _bvalfromboundary('fill')
            with warnings.catch_warnings():
-                warnings.simplefilter('ignore', numpy.ComplexWarning)
+                warnings.simplefilter('ignore', np.ComplexWarning)
                for b in xrange(img.shape[0]):
                    for n in xrange(kern.shape[0]):
                        for im0 in xrange(img.shape[1]):
@@ -1592,8 +1591,8 @@ class AbstractConv(BaseAbstractConv):
    def perform(self, node, inp, out_):
        img, kern = inp
-        img = numpy.asarray(img)
+        img = np.asarray(img)
-        kern = numpy.asarray(kern)
+        kern = np.asarray(kern)
        dil_kernshp = tuple((kern.shape[2 + i] - 1) * self.filter_dilation[i] + 1
                            for i in range(self.convdim))
        o, = out_
@@ -1613,10 +1612,10 @@ class AbstractConv(BaseAbstractConv):
        if isinstance(mode, tuple):
            pad = tuple(int(mode[i]) for i in range(self.convdim))
            mode = "valid"
-            new_img = numpy.zeros((img.shape[0], img.shape[1]) +
+            new_img = np.zeros((img.shape[0], img.shape[1]) +
-                                  tuple(img.shape[i + 2] + 2 * pad[i]
+                               tuple(img.shape[i + 2] + 2 * pad[i]
-                                        for i in range(self.convdim)),
+                                     for i in range(self.convdim)),
-                                  dtype=img.dtype)
+                               dtype=img.dtype)
            new_img[(slice(None), slice(None)) +
                    tuple(slice(pad[i], img.shape[i + 2] + pad[i])
                          for i in range(self.convdim))] = img
@@ -1809,8 +1808,8 @@ class AbstractConv_gradWeights(BaseAbstractConv):
    def perform(self, node, inp, out_):
        img, topgrad, shape = inp
-        img = numpy.asarray(img)
+        img = np.asarray(img)
-        topgrad = numpy.asarray(topgrad)
+        topgrad = np.asarray(topgrad)
        o, = out_
@@ -1833,10 +1832,10 @@ class AbstractConv_gradWeights(BaseAbstractConv):
            pad = tuple(int(mode[i]) for i in range(self.convdim))
            mode = "valid"
-            new_img = numpy.zeros((img.shape[0], img.shape[1]) +
+            new_img = np.zeros((img.shape[0], img.shape[1]) +
-                                  tuple(img.shape[i + 2] + 2 * pad[i]
+                               tuple(img.shape[i + 2] + 2 * pad[i]
-                                        for i in range(self.convdim)),
+                                     for i in range(self.convdim)),
-                                  dtype=img.dtype)
+                               dtype=img.dtype)
            new_img[(slice(None), slice(None)) +
                    tuple(slice(pad[i], img.shape[i + 2] + pad[i])
                          for i in range(self.convdim))] = img
@@ -1846,7 +1845,7 @@ class AbstractConv_gradWeights(BaseAbstractConv):
            new_shape = ((topgrad.shape[0], topgrad.shape[1]) +
                         tuple(img.shape[i + 2] - dil_shape[i] + 1
                               for i in range(self.convdim)))
-            new_topgrad = numpy.zeros((new_shape), dtype=topgrad.dtype)
+            new_topgrad = np.zeros((new_shape), dtype=topgrad.dtype)
            new_topgrad[(slice(None), slice(None)) +
                        tuple(slice(None, None, self.subsample[i])
                              for i in range(self.convdim))] = topgrad
@@ -2049,8 +2048,8 @@ class AbstractConv_gradInputs(BaseAbstractConv):
    def perform(self, node, inp, out_):
        kern, topgrad, shape = inp
-        kern = numpy.asarray(kern)
+        kern = np.asarray(kern)
-        topgrad = numpy.asarray(topgrad)
+        topgrad = np.asarray(topgrad)
        o, = out_
        mode = self.border_mode
@@ -2089,7 +2088,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
            new_shape = ((topgrad.shape[0], topgrad.shape[1]) +
                         tuple(shape[i] + 2 * pad[i] - dil_kernshp[i] + 1
                               for i in range(self.convdim)))
-            new_topgrad = numpy.zeros((new_shape), dtype=topgrad.dtype)
+            new_topgrad = np.zeros((new_shape), dtype=topgrad.dtype)
            new_topgrad[(slice(None), slice(None)) +
                        tuple(slice(None, None, self.subsample[i])
                              for i in range(self.convdim))] = topgrad

--- a/theano/tensor/nnet/blocksparse.py
+++ b/theano/tensor/nnet/blocksparse.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import theano
 from theano import Op, Apply
@@ -106,7 +106,7 @@ class SparseBlockGemv(Op):
                for i in range(h.shape[1]):
                    inputIdx = iIdx[b, i]
                    w = W[inputIdx, outputIdx]
-                    o[b, j, :] += numpy.dot(h[b, i], w)
+                    o[b, j, :] += np.dot(h[b, i], w)
        out_[0][0] = o
    def infer_shape(self, node, input_shapes):
@@ -185,7 +185,7 @@ class SparseBlockOuter(Op):
          Which blocks will be computed is specified in `yIdx`.
        """
-        one = theano.tensor.constant(numpy.asarray(1.0, dtype='float32'))
+        one = theano.tensor.constant(np.asarray(1.0, dtype='float32'))
        o = theano.tensor.as_tensor_variable(o)
        x = theano.tensor.as_tensor_variable(x)
        y = theano.tensor.as_tensor_variable(y)
@@ -208,8 +208,8 @@ class SparseBlockOuter(Op):
        for b in range(x.shape[0]):
            for i in range(xIdx.shape[1]):
                for j in range(yIdx.shape[1]):
-                    o[xIdx[b, i], yIdx[b, j]] += numpy.outer(x[b, i],
+                    o[xIdx[b, i], yIdx[b, j]] += np.outer(x[b, i],
-                                                             y[b, j, :])
+                                                          y[b, j, :])
        out_[0][0] = o

--- a/theano/tensor/nnet/bn.py
+++ b/theano/tensor/nnet/bn.py
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import theano
 from theano import Apply, Op
 from theano.gof import local_optimizer
@@ -89,7 +89,7 @@ def _prepare_batch_normalization_axes(axes, ndim):
        axes = (0,)
    elif axes == 'spatial':
        axes = (0,) + tuple(range(2, ndim))
-    elif isinstance(axes, (tuple, list, numpy.ndarray)):
+    elif isinstance(axes, (tuple, list, np.ndarray)):
        axes = tuple(int(a) for a in axes)
    else:
        raise ValueError('invalid axes: %s', str(axes))
@@ -215,7 +215,7 @@ def batch_normalization_train(inputs, gamma, beta, axes='per-activation',
    # epsilon will be converted to floatX later. we need to check
    # for rounding errors now, since numpy.float32(1e-5) < 1e-5.
-    epsilon = numpy.cast[theano.config.floatX](epsilon)
+    epsilon = np.cast[theano.config.floatX](epsilon)
    if epsilon < 1e-5:
        raise ValueError("epsilon must be at least 1e-5, got %s" % str(epsilon))
@@ -337,7 +337,7 @@ def batch_normalization_test(inputs, gamma, beta, mean, var,
    # epsilon will be converted to floatX later. we need to check
    # for rounding errors now, since numpy.float32(1e-5) < 1e-5.
-    epsilon = numpy.cast[theano.config.floatX](epsilon)
+    epsilon = np.cast[theano.config.floatX](epsilon)
    if epsilon < 1e-5:
        raise ValueError("epsilon must be at least 1e-5, got %s" % str(epsilon))
@@ -480,7 +480,7 @@ class AbstractBatchNormTrain(Op):
        mean = x.mean(axes, keepdims=True)
        var = x.var(axes, keepdims=True)
-        invstd = 1.0 / numpy.sqrt(var + epsilon)
+        invstd = 1.0 / np.sqrt(var + epsilon)
        out = (x - mean) * (scale * invstd) + bias
        output_storage[0][0] = out
@@ -493,7 +493,7 @@ class AbstractBatchNormTrain(Op):
                mean * running_average_factor
            output_storage[3][0] = running_mean
        if len(inputs) > 6:
-            m = float(numpy.prod(x.shape) / numpy.prod(scale.shape))
+            m = float(np.prod(x.shape) / np.prod(scale.shape))
            running_var = inputs[6]
            running_var = running_var * (1.0 - running_average_factor) + \
                (m / (m - 1)) * var * running_average_factor
@@ -568,7 +568,7 @@ class AbstractBatchNormInference(Op):
    def perform(self, node, inputs, output_storage):
        x, scale, bias, estimated_mean, estimated_variance, epsilon = inputs
-        out = (x - estimated_mean) * (scale / numpy.sqrt(estimated_variance + epsilon)) + bias
+        out = (x - estimated_mean) * (scale / np.sqrt(estimated_variance + epsilon)) + bias
        output_storage[0][0] = out
@@ -607,12 +607,12 @@ class AbstractBatchNormTrainGrad(Op):
            raise ValueError('axes should be less than ndim (<%d), but %s given' % (x.ndim, str(axes)))
        x_diff = x - x_mean
-        mean_dy_x_diff = numpy.mean(dy * x_diff, axis=axes, keepdims=True)
+        mean_dy_x_diff = np.mean(dy * x_diff, axis=axes, keepdims=True)
        c = (dy * x_invstd) - (x_diff * mean_dy_x_diff * (x_invstd ** 3))
-        g_wrt_inputs = scale * (c - numpy.mean(c, axis=axes, keepdims=True))
+        g_wrt_inputs = scale * (c - np.mean(c, axis=axes, keepdims=True))
-        g_wrt_scale = numpy.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
+        g_wrt_scale = np.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
-        g_wrt_bias = numpy.sum(dy, axis=axes, keepdims=True)
+        g_wrt_bias = np.sum(dy, axis=axes, keepdims=True)
        output_storage[0][0] = g_wrt_inputs
        output_storage[1][0] = g_wrt_scale

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -12,7 +12,7 @@ from __future__ import absolute_import, print_function, division
 import logging
-import numpy
+import numpy as np
 from six.moves import xrange
 import warnings
@@ -756,8 +756,8 @@ class ConvOp(OpenMPOp):
                (1, 1))[2:]
        if z[0] is None or z[0].shape != (bsize, nkern,) + fulloutshp:
-            z[0] = numpy.zeros((bsize, nkern,) + fulloutshp,
+            z[0] = np.zeros((bsize, nkern,) + fulloutshp,
-                               dtype=img2d.dtype)
+                            dtype=img2d.dtype)
        zz = z[0]
        stacklen = imshp[0]
@@ -767,18 +767,18 @@ class ConvOp(OpenMPOp):
        if self.imshp != self.imshp_logical:
            # assuming that to get from imshp to imshp logical we insert zeros in missing spots
-            rstride = int(numpy.ceil(imshp_logical[1] / float(imshp[1])))
+            rstride = int(np.ceil(imshp_logical[1] / float(imshp[1])))
-            cstride = int(numpy.ceil(imshp_logical[2] / float(imshp[2])))
+            cstride = int(np.ceil(imshp_logical[2] / float(imshp[2])))
-            buf = numpy.zeros((bsize,) + imshp_logical, dtype=img2d.dtype)
+            buf = np.zeros((bsize,) + imshp_logical, dtype=img2d.dtype)
            buf[:, :, ::rstride, ::cstride] = img2d
            img2d = buf
            del buf, rstride, cstride
        if kshp != kshp_logical:
-            rstride = int(numpy.ceil(kshp_logical[0] / float(kshp[0])))
+            rstride = int(np.ceil(kshp_logical[0] / float(kshp[0])))
-            cstride = int(numpy.ceil(kshp_logical[1] / float(kshp[1])))
+            cstride = int(np.ceil(kshp_logical[1] / float(kshp[1])))
-            buf = numpy.zeros((nkern, stacklen) +
+            buf = np.zeros((nkern, stacklen) +
-                              self.kshp_logical, dtype=filtersflipped.dtype)
+                           self.kshp_logical, dtype=filtersflipped.dtype)
            if self.kshp_logical_top_aligned:
                roffset = coffset = 0
            else:
@@ -796,7 +796,7 @@ class ConvOp(OpenMPOp):
        bval = _bvalfromboundary('fill')
        with warnings.catch_warnings():
-            warnings.simplefilter('ignore', numpy.ComplexWarning)
+            warnings.simplefilter('ignore', np.ComplexWarning)
            for b in xrange(bsize):
                for n in xrange(nkern):
                    zz[b, n, ...].fill(0)
@@ -808,9 +808,9 @@ class ConvOp(OpenMPOp):
        if False:
            if False and self.out_mode == "full":
-                img2d2 = numpy.zeros((bsize, stacklen,
+                img2d2 = np.zeros((bsize, stacklen,
-                                      imshp[1] + 2 * kshp[0] - 2,
+                                   imshp[1] + 2 * kshp[0] - 2,
-                                      imshp[2] + 2 * kshp[1] - 2))
+                                   imshp[2] + 2 * kshp[1] - 2))
                img2d2[:, :, kshp[0] - 1:kshp[0] - 1 + imshp[1],
                       kshp[1] - 1:kshp[1] - 1 + imshp[2]] = img2d
                img2d = img2d2
@@ -873,7 +873,7 @@ class ConvOp(OpenMPOp):
            tmp_node = theano.tensor.nnet.conv3D(
                V=shuffled_inputs,
                W=shuffled_kerns,
-                b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype),
+                b=theano.tensor.alloc(np.asarray(0, dtype=kerns.dtype),
                                      kerns.shape[0]),
                d=(self.dx, self.dy, 1))
            node = theano.tensor.addbroadcast(
@@ -1260,17 +1260,17 @@ if(%(value)s != %(expected)s){
        if all_shape:
            d["self_kshp_logical_r"] = self.kshp_logical[0]
            d["self_kshp_logical_c"] = self.kshp_logical[1]
-            d["self_kshp_logical_stride_r"] = int(numpy.ceil(
+            d["self_kshp_logical_stride_r"] = int(np.ceil(
                self.kshp_logical[0] / float(self.kshp[0])))
-            d["self_kshp_logical_stride_c"] = int(numpy.ceil(
+            d["self_kshp_logical_stride_c"] = int(np.ceil(
                self.kshp_logical[1] / float(self.kshp[1])))
            d["self_imshp_logical_r"] = self.imshp_logical[1]
            # numpy.B. 1  not 0
            d["self_imshp_logical_c"] = self.imshp_logical[2]
            # numpy.B. 2  not 1
-            d["self_imshp_logical_stride_r"] = int(numpy.ceil(
+            d["self_imshp_logical_stride_r"] = int(np.ceil(
                self.imshp_logical[1] / float(self.imshp[1])))
-            d["self_imshp_logical_stride_c"] = int(numpy.ceil(
+            d["self_imshp_logical_stride_c"] = int(np.ceil(
                self.imshp_logical[2] / float(self.imshp[2])))
            if not self.imshp[0] == 1:
                d["affectation"] = "+="

--- a/theano/tensor/nnet/neighbours.py
+++ b/theano/tensor/nnet/neighbours.py
@@ -4,7 +4,7 @@ TODO: implement Images2Neibs.infer_shape() methods
 """
 from __future__ import absolute_import, print_function, division
-import numpy
+import numpy as np
 import theano
 from theano import Op, Apply
@@ -224,7 +224,7 @@ class Images2Neibs(Op):
        z_dim0 = grid_c * grid_d * ten4.shape[1] * ten4.shape[0]
        z_dim1 = c * d
-        z[0] = numpy.empty((z_dim0, z_dim1), dtype=node.outputs[0].dtype)
+        z[0] = np.empty((z_dim0, z_dim1), dtype=node.outputs[0].dtype)
        nb_batch = ten4.shape[0]
        nb_stack = ten4.shape[1]

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -15,7 +15,7 @@ revisited later when all the intermediate part are on the GPU.
 from __future__ import absolute_import, print_function, division
 import logging
 import warnings
-import numpy
+import numpy as np
 from six.moves import xrange
 import theano
@@ -85,7 +85,7 @@ class SoftmaxWithBias(gof.Op):
        if x.size == 0:
            # Numpy doesn't like the max of a zero-sized object.
-            output_storage[0][0] = numpy.zeros(x.shape, dtype=x.dtype)
+            output_storage[0][0] = np.zeros(x.shape, dtype=x.dtype)
            return
        x_dtype = x.dtype
@@ -94,7 +94,7 @@ class SoftmaxWithBias(gof.Op):
            x = x.astype('float32')
        x_plus_b = x + b[None, :]
-        e_x = numpy.exp(x_plus_b - x_plus_b.max(axis=1)[:, None])
+        e_x = np.exp(x_plus_b - x_plus_b.max(axis=1)[:, None])
        e_x *= 1.0 / e_x.sum(axis=1)[:, None]
        # default for copy is True and we don't need a copy if the
        # data type matches.
@@ -314,7 +314,7 @@ class SoftmaxGrad(gof.Op):
    def perform(self, node, input_storage, output_storage):
        dy, sm = input_storage
-        dx = numpy.zeros_like(sm)
+        dx = np.zeros_like(sm)
        # dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j]
        for i in xrange(sm.shape[0]):
            dy_times_sm_i = dy[i] * sm[i]
@@ -435,7 +435,7 @@ class Softmax(gof.Op):
    def perform(self, node, input_storage, output_storage):
        x, = input_storage
-        e_x = numpy.exp(x - x.max(axis=1)[:, None])
+        e_x = np.exp(x - x.max(axis=1)[:, None])
        sm = e_x / e_x.sum(axis=1)[:, None]
        output_storage[0][0] = sm
@@ -620,8 +620,8 @@ class LogSoftmax(gof.Op):
    def perform(self, node, input_storage, output_storage):
        x, = input_storage
        xdev = x - x.max(axis=1)[:, None]
-        lsm = xdev - numpy.log(numpy.sum(numpy.exp(xdev), axis=1,
+        lsm = xdev - np.log(np.sum(np.exp(xdev), axis=1,
-                               keepdims=True))
+                            keepdims=True))
        output_storage[0][0] = lsm
    def grad(self, inp, grads):
@@ -1003,27 +1003,27 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
            raise ValueError('y_idx must have same number of rows as x')
        if any(y_idx < 0):
            raise ValueError("y_i value out of bounds")
-        sm = numpy.zeros_like(x)  # softmax
+        sm = np.zeros_like(x)  # softmax
-        nll = numpy.zeros(x.shape[0], dtype=node.outputs[0].type.dtype)  # nll(y | softmax(x))
+        nll = np.zeros(x.shape[0], dtype=node.outputs[0].type.dtype)  # nll(y | softmax(x))
-        am = numpy.zeros_like(y_idx)
+        am = np.zeros_like(y_idx)
        for i in xrange(sm.shape[0]):
            # add the bias vector to the i'th row of x
            row = x[i] + b
            # get the maximum value of i'th row for numerically safe
            # softmax / nll
-            am[i] = numpy.argmax(row)
+            am[i] = np.argmax(row)
            m = row[am[i]]
            # compute the unnormalized softmax, and normalization constant
-            sm[i] = numpy.exp(row - m)
+            sm[i] = np.exp(row - m)
-            sum_j = numpy.sum(sm[i])  # sum_j(exp(x[j] - m))
+            sum_j = np.sum(sm[i])  # sum_j(exp(x[j] - m))
            # normalized our softmax
            sm[i] *= 1.0 / sum_j
            # store the nll
-            nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j)
+            nll[i] = -row[y_idx[i]] + m + np.log(sum_j)
        output_storage[0][0] = nll
        output_storage[1][0] = sm
@@ -1200,7 +1200,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
        dy, sm, y_idx = input_storage
        if any(y_idx < 0):
            raise ValueError("y_i value out of bounds")
-        dx = numpy.zeros_like(sm)
+        dx = np.zeros_like(sm)
        if dy.ndim == 0:
            dy = dy[None]
        incr = int(dy.shape[0] > 1)
@@ -1391,7 +1391,7 @@ class CrossentropyCategorical1HotGrad(gof.Op):
    def perform(self, node, inp, out):
        g_y, coding_dist, true_one_of_n = inp
        g_coding_strg, = out
-        g_coding = numpy.zeros_like(coding_dist)
+        g_coding = np.zeros_like(coding_dist)
        for i in xrange(len(g_y)):
            g_coding[i, true_one_of_n[i]] = (-g_y[i] /
                                             coding_dist[i, true_one_of_n[i]])
@@ -1450,9 +1450,9 @@ class CrossentropyCategorical1Hot(gof.Op):
    def perform(self, node, inp, out):
        coding, one_of_n = inp
        y_out, = out
-        y = numpy.zeros_like(coding[:, 0])
+        y = np.zeros_like(coding[:, 0])
        for i in xrange(len(y)):
-            y[i] = -numpy.log(coding[i, one_of_n[i]])
+            y[i] = -np.log(coding[i, one_of_n[i]])
        y_out[0] = y
    def infer_shape(self, node, in_shapes):
@@ -1659,9 +1659,9 @@ def _is_const(z, val, approx=False):
    except tensor.NotScalarConstantError:
        return False
    if approx:
-        return numpy.allclose(maybe, val)
+        return np.allclose(maybe, val)
    else:
-        return numpy.all(maybe == val)
+        return np.all(maybe == val)
 @opt.register_specialize('fast_compile_gpu')
@@ -1792,7 +1792,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
            # set out_grad according to the numerator, it may be divided later
            # num should be a vector or a scalar
-            if num.ndim == 1 or numpy.all(num.broadcastable):
+            if num.ndim == 1 or np.all(num.broadcastable):
                out_grad *= -num
            else:
                return
@@ -1818,7 +1818,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
                            rest = tensor.mul(*[other_inputs])
                        # Check that rest is a vector or a scalar
-                        if rest.ndim == 1 or numpy.all(rest.broadcastable):
+                        if rest.ndim == 1 or np.all(rest.broadcastable):
                            adv_subtensor = input
                            out_grad /= rest
                            break
@@ -2099,14 +2099,14 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
        output, = out
        new_shape = (mat.shape[0], mat.shape[1] + 1)
        if output[0] is None:
-            output[0] = numpy.empty(new_shape, dtype=mat.dtype)
+            output[0] = np.empty(new_shape, dtype=mat.dtype)
            out = output[0]
        else:
            if output[0].shape != new_shape:
                try:
                    output[0].resize(new_shape)
                except Exception:
-                    output[0] = numpy.empty(new_shape, dtype=mat.dtype)
+                    output[0] = np.empty(new_shape, dtype=mat.dtype)
            out = output[0]
        out[:, 0].fill(self.val.data)
@@ -2147,14 +2147,14 @@ class Prepend_scalar_to_each_row(gof.Op):
        output, = out
        new_shape = (mat.shape[0], mat.shape[1] + 1)
        if output[0] is None:
-            output[0] = numpy.empty(new_shape, dtype=mat.dtype)
+            output[0] = np.empty(new_shape, dtype=mat.dtype)
            out = output[0]
        else:
            if output[0].shape != new_shape:
                try:
                    output[0].resize(new_shape)
                except Exception:
-                    output[0] = numpy.empty(new_shape, dtype=mat.dtype)
+                    output[0] = np.empty(new_shape, dtype=mat.dtype)
            out = output[0]
        out[:, 0].fill(val)
        out[:, 1:] = mat

--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -9,7 +9,7 @@ from __future__ import absolute_import, print_function, division
 import warnings
-import numpy
+import numpy as np
 import theano
 from theano import config, gof, printing, scalar
@@ -41,8 +41,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
        # half-precision (float16), where we want float32.
        x_dtype = str(getattr(x, 'dtype', ''))
        if x_dtype in ('int8', 'uint8'):
-            return 1.0 / (1.0 + numpy.exp(-x, sig='f'))
+            return 1.0 / (1.0 + np.exp(-x, sig='f'))
-        return 1.0 / (1.0 + numpy.exp(-x))
+        return 1.0 / (1.0 + np.exp(-x))
    def impl(self, x):
        return ScalarSigmoid.st_impl(x)
@@ -134,8 +134,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
        This method was used to generate the graph: sigmoid_prec.png in the doc.
        """
-        data = numpy.arange(-15, 15, .1)
+        data = np.arange(-15, 15, .1)
-        val = 1 / (1 + numpy.exp(-data))
+        val = 1 / (1 + np.exp(-data))
        def hard_sigmoid(x):
            return theano.tensor.nnet.hard_sigmoid(x)
@@ -330,8 +330,8 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
        # half-precision (float16), where we want float32.
        x_dtype = str(getattr(x, 'dtype', ''))
        if x_dtype in ('int8', 'uint8'):
-            return numpy.log1p(numpy.exp(x, sig='f'))
+            return np.log1p(np.exp(x, sig='f'))
-        return numpy.log1p(numpy.exp(x))
+        return np.log1p(np.exp(x))
    def impl(self, x):
        return ScalarSoftplus.static_impl(x)
@@ -399,7 +399,7 @@ def _is_1(expr):
    """
    try:
        v = opt.get_scalar_constant_value(expr)
-        return numpy.allclose(v, 1)
+        return np.allclose(v, 1)
    except tensor.NotScalarConstantError:
        return False
@@ -457,7 +457,7 @@ def is_1pexp(t, only_process_constants=True):
                    scal_sum = scalars[0]
                    for s in scalars[1:]:
                        scal_sum = scal_sum + s
-                    if numpy.allclose(scal_sum, 1):
+                    if np.allclose(scal_sum, 1):
                        return False, maybe_exp.owner.inputs[0]
                # Before 7987b51 there used to be a bug where *any* constant
                # was considered as if it was equal to 1, and thus this
@@ -569,7 +569,7 @@ def is_neg(var):
        for idx, mul_input in enumerate(apply.inputs):
            try:
                constant = opt.get_scalar_constant_value(mul_input)
-                is_minus_1 = numpy.allclose(constant, -1)
+                is_minus_1 = np.allclose(constant, -1)
            except NotScalarConstantError:
                is_minus_1 = False
            if is_minus_1:
@@ -968,7 +968,7 @@ def local_inv_1_plus_exp(node):
            # scalar_inputs are potentially dimshuffled and fill'd scalars
            if len(nonconsts) == 1:
                if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
-                    if scalars and numpy.allclose(numpy.sum(scalars), 1):
+                    if scalars and np.allclose(np.sum(scalars), 1):
                        out = opt._fill_chain(
                            sigmoid(
                                tensor.neg(nonconsts[0].owner.inputs[0])),
@@ -999,7 +999,7 @@ def local_1msigmoid(node):
                val_l = opt.get_scalar_constant_value(sub_l)
            except Exception:
                return
-            if numpy.allclose(numpy.sum(val_l), 1):
+            if np.allclose(np.sum(val_l), 1):
                out = sigmoid(-sub_r.owner.inputs[0])
                copy_stack_trace([sub_r, node.outputs[0]], out)
                return [out]