Merge pull request #1580 from nouiz/deprecated

[MRG]Deprecated

Merge pull request #1580 from nouiz/deprecated
9950ce08 · Pascal Lamblin · 1d639d66 · 7c42bebe · 9950ce08 · 9950ce08
--- a/MANIFEST.in
+++ b/MANIFEST.in
 global-include *.txt
+global-include *.c
 global-include *.cu
 global-include *.cuh
 global-include *.sh

--- a/doc/extending/fibby.txt
+++ b/doc/extending/fibby.txt
@@ -67,9 +67,9 @@ you should check the strides and alignment.
        if (!%(y)s)
            %(fail)s;
        {//New scope needed to make compilation work
-            dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
+            dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-            dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
+            dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
-            for (int i = 2; i < %(x)s->dimensions[0]; ++i)
+            for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
                y[i] = y[i-1]*y[i-2] + x[i];
        }
      """ % locals()

--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -420,7 +420,9 @@ TensorVariable
 .. class:: _tensor_py_operators(object)
-    This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`).
+    This mix-in class adds convenient attributes, methods, and support
+    to TensorVariable, TensorConstant and TensorSharedVariable for
+    Python operators (see :ref:`tensor_operator_support`).
    .. attribute:: type
@@ -472,6 +474,10 @@ TensorVariable
        See :func:`flatten`.
+    .. method:: ravel()
+        return self.flatten(). For NumPy compatibility.
    .. attribute:: T
        Transpose of this tensor.
@@ -485,8 +491,31 @@ TensorVariable
            same vector!  Use `reshape` or `dimshuffle` to turn your vector
            into a row or column matrix.
+    .. method:: {any,all}(axis=None, keepdims=False)
+    .. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
+    .. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
+    .. method:: diagonal(offset=0, axis1=0, axis2=1)
+    .. method:: astype(dtype)
+    .. method:: take(indices, axis=None, mode='raise')
+    .. method:: copy()
+    .. method:: norm(L, axis=None)
+    .. method:: nonzero(self, return_matrix=False)
+    .. method:: nonzero_values(self)
+    .. method:: sort(self, axis=-1, kind='quicksort', order=None)
+    .. method:: argsort(self, axis=-1, kind='quicksort', order=None)
+    .. method:: clip(self, a_min, a_max)
+    .. method:: conf()
+    .. method:: repeat(repeats, axis=None)
+    .. method:: round(mode="half_away_from_zero")
+    .. method:: trace()
+    .. method:: get_scalar_constant_value()
+    .. method:: zeros_like(model, dtype=None)
+       All the above methods are equivalent to NumPy for Theano on the current tensor.
+    .. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
+       Those elemwise operation are supported via Python syntax.
 Shaping and Shuffling
 =====================

--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
        prep_vars = """
            //the output array has size M x N
            npy_intp M = PyArray_DIMS(%(a)s)[0];
-            npy_intp Sa = %(a)s->strides[0] / PyArray_DESCR(%(a)s)->elsize;
+            npy_intp Sa = PyArray_STRIDES(%(a)s)[0] / PyArray_DESCR(%(a)s)->elsize;
-            npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
+            npy_intp Sz = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
-            npy_double * Da = (npy_double*)%(a)s->data;
+            npy_double * Da = (npy_double*)PyArray_BYTES(%(a)s);
-            npy_double * Dz = (npy_double*)%(z)s->data;
+            npy_double * Dz = (npy_double*)PyArray_BYTES(%(z)s);
            //clear the output array
            for (npy_intp m = 0; m < M; ++m)

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
        #to use the new API, but not everywhere. When finished, enable
        #the following macro to assert that we don't bring new code
        #that use the old API.
-        #cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
+        cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
        numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
        # numpy 1.7 deprecated the following macro but the new one didn't

--- a/theano/gof/lazylinker_c.c.txt
+++ b/theano/gof/lazylinker_c.c.txt
--- a/theano/gof/lazylinker_c.py
+++ b/theano/gof/lazylinker_c.py
@@ -76,10 +76,7 @@ except ImportError:
        except ImportError:
            _logger.info("Compiling new CVM")
            dirname = 'lazylinker_ext'
-            # We use a .txt extensions as otherwise it don't get
+            cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
-            # included when we create a package to send to pypi
-            # This happen even if we tell to include *.c files
-            cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c.txt')
            code = open(cfile).read()
            loc = os.path.join(config.compiledir, dirname)
            if not os.path.exists(loc):

--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
@@ -220,6 +220,7 @@ if __name__ == "__main__":
        GTX 650 Ti               0.27s
        GTX 460                  0.37s                0.45s
        GTX 285           0.42s         0.452s        0.452s        0.40s # cuda 3.0 seems faster? driver version?
+        750M                     0.49s
        GTX 550 Ti                                    0.57s
        GT 520                          2.68s                3.06s
        520M                     2.44s                       3.19s        # with bumblebee on Ubuntu 12.04

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
        out[0] = x.reshape(tuple(shp))
-# C Code shared by GpuSubtensor and GpuIncSubtensor
-_define_set_data = """
-    #define CudaNdarray_set_device_data2(obj, ptr, base) \
-            CudaNdarray_set_device_data(obj, (float *)ptr, base)
-"""
 class GpuSubtensor(GpuOp, tensor.Subtensor):
    """
    Implement subtensor on the gpu.
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
        view_ndim = node.outputs[0].ndim
        fail = sub['fail']
+        decl = "CudaNdarray* xview = NULL;"
+        get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
+                                       self.idx_list,
+                                       view_ndim=view_ndim,
+                                       c_prefix='CudaNdarray',
+                                       strides_mul=4,
+                                       )
        build_view = """
        //TODO: give this Op a second output so that this view can be cached
        //TODO: alternatively, fix the memory leak on failure
-        CudaNdarray* xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
+        xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
        if (!xview)
        {
            %(fail)s;
        }
-        if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s),
-                                       (PyObject*) NULL))
+        if (CudaNdarray_set_device_data(
+                xview,
+                CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
+                (PyObject*) %(x)s))
        {
            PyErr_Format(PyExc_RuntimeError,
                         "GpuSubtensor is not able to set the"
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
            %(fail)s;
        }
        cnda_mark_dev_structure_dirty(xview);
-        """ % locals()
+        for(int idx=0;idx <%(view_ndim)s; idx++){
+        //For broadcasted dimensions, set the strides to 0
-        get_xview = _define_set_data + \
+        //We can't do that only for broadcasted dimensions as this can happen
-                    self.helper_c_code(node, name, inputs, outputs, sub,
+        //for dimensions of size 0. That are rebroadcated later.
-                                       self.idx_list,
+            if(xview_dims[idx]==1)
-                                       c_prefix='CudaNdarray',
+                CudaNdarray_set_stride(xview, idx, 0);
-                                       set_data='CudaNdarray_set_device_data2',
+            else
-                                       set_dim='CudaNdarray_set_dim',
+                CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
-                                       set_stride='CudaNdarray_set_stride',
+            CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
-                                       update_flags="", strides_mul=4)
-        finish_view = ""
-        #For broadcasted dimensions, set the strides to 0
-        #We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
-        #That are rebroadcated later.
-        for idx in range(node.outputs[0].ndim):
-            finish_view += """
-            if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
-            CudaNdarray_set_stride(xview, %(idx)s, 0);
-            """ % locals()
-        finish_view += """
-        //Set the base only now
-        if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
-                                    %(x)s)){
-            PyErr_Format(PyExc_RuntimeError,
-                         "GpuSubtensor is not able to set"
-                         " the base of the view array");
-            Py_XDECREF(xview);
-            %(fail)s;
        }
+        """ % locals()
+        finish_view = """
        Py_XDECREF(%(z)s);
        %(z)s = xview;
        """ % locals()
-        return build_view + "{" + get_xview + "}" + finish_view
+        return decl + get_xview + build_view + finish_view
    def c_code_cache_version(self):
        hv = self.helper_c_code_cache_version()
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
        """ %locals()
 class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
    """
    Implement IncSubtensor on the gpu.
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
        """
        return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()
+    def decl_view(self):
+        return "CudaNdarray* zview = NULL;"
    def make_view_array(self, x, view_ndim):
        """
            :param x: a string identifying an array to be viewed
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
            This doesn't need to actually set up the view with the
            right indexing; we'll do that manually later.
        """
-        return """CudaNdarray* zview = (CudaNdarray*)
+        ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
-                CudaNdarray_New(%(view_ndim)s)""" % locals()
+        if (CudaNdarray_set_device_data(
+                zview,
+                CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
+                (PyObject*) %(x)s))
+        {
+            zview = NULL;
+            PyErr_Format(PyExc_RuntimeError,
+                         "GpuSubtensor is not able to set the"
+                         " devdata field of the view");
+        }else{
+            cnda_mark_dev_structure_dirty(zview);
+            for(int idx=0;idx <%(view_ndim)s; idx++){
+                if(xview_dims[idx]==1)
+                    CudaNdarray_set_stride(zview, idx, 0);
+                else
+                    CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
+                CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
+            }
+        }
+        """ % locals()
+        return ret
    def get_helper_c_code_args(self):
        """ Return a dictionary of arguments to use with helper_c_code"""
-        return { 'update_flags' : "",
+        return {'c_prefix': 'CudaNdarray',
-                'c_prefix' : 'CudaNdarray',
-                'set_data' :'CudaNdarray_set_device_data2',
-                'set_dim' : 'CudaNdarray_set_dim',
-                'set_stride' : 'CudaNdarray_set_stride',
-                'update_flags' : "",
                'strides_mul': 4
                }
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
        """
        return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals()
-    def define_set_data(self):
-        return _define_set_data
-    def link_view_array(self, x, fail):
-        return """
-        if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(%(x)s),
-                                       (PyObject*) NULL))
-        {
-            PyErr_Format(PyExc_RuntimeError,
-                         "GpuSubtensor is not able to set the"
-                         " devdata field of the view");
-            Py_XDECREF(zview);
-            %(fail)s;
-        }
-        cnda_mark_dev_structure_dirty(zview);
-        """ % locals()
    def set_view_base(self, x, fail):
        return """
        //Set the base only now
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
    def add_to_zview(self, x, fail):
        return """
        PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
                                                        (PyObject *) py_%(x)s);
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
        """ % locals()
    def c_code_cache_version(self):
        parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
        if parent_version:
            return parent_version + (0,)

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
 http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
 """
-import sys, warnings
+import warnings
 import numpy
 from theano import Op, Apply, shared, config, Variable
 from theano.tensor import (raw_random, TensorType, as_tensor_variable,
                           get_vector_length, cast, opt, scal)
-from theano.tensor import zeros_like, sqrt, log, sin, cos, join, prod
+from theano.tensor import sqrt, log, sin, cos, join, prod
 from theano.compile import optdb
 from theano.gof import local_optimizer
 from theano.gof.python25 import all, any
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
                x[i] = r + m
    return x
 def multMatVect(v, A, m1, B, m2):
    #multiply the first half of v by A with a modulo of m1
    #and the second half by B with a modulo of m2
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
     [1401213391, 1178684362, 1431130166]])
 np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
 def ff_2p134(rstate):
    return multMatVect(rstate, A1p134, M1, A2p134, M2)
 def ff_2p72(rstate):
    return multMatVect(rstate, A1p72, M1, A2p72, M2)
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
    #i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
    i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
    #first component
-    y1 = (((x12 & MASK12) << i22) + (x12 >> i9)
+    y1 = (((x12 & MASK12) << i22) + (x12 >> i9) +
-        + ((x13 & MASK13) << i7) + (x13 >> i24))
+          ((x13 & MASK13) << i7) + (x13 >> i24))
    assert type(y1) == numpy.int32
    if (y1 < 0 or y1 >= M1):     #must also check overflow
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
    else:
        return (x11 - x21) * NORM
 class mrg_uniform_base(Op):
    def __init__(self, output_type, inplace=False):
        Op.__init__(self)
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
        self.warned_numpy_version = False
    def __eq__(self, other):
-        return type(self) == type(other) \
+        return (type(self) == type(other) and
-                and self.output_type == other.output_type \
+                self.output_type == other.output_type and
-                and self.inplace == other.inplace
+                self.inplace == other.inplace)
    def __hash__(self):
        return hash(type(self)) ^ hash(self.output_type) ^ hash(self.inplace)
    def __str__(self):
        if self.inplace:
            s = "inplace"
-        else: s = "no_inplace"
+        else:
-        return self.__class__.__name__+"{%s,%s}"%(self.output_type,s)
+            s = "no_inplace"
+        return self.__class__.__name__ + "{%s,%s}" % (self.output_type, s)
    def make_node(self, rstate, size):
        # error checking slightly redundant here, since
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
                     [rstate, size],
                     [rstate.type(), self.output_type()])
-    def grad(self,inputs,ograd):
+    def grad(self, inputs, ograd):
        return [None for i in inputs]
    def R_op(self, inputs, eval_points):
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
    def perform(self, node, inp, out):
        rstate, size = inp
        o_rstate, o_sample = out
-        numpy_version=numpy.__version__.split('.')
+        numpy_version = numpy.__version__.split('.')
-        if not self.warned_numpy_version and int(numpy_version[0])<=1 and int(numpy_version[1])<3:
+        if not self.warned_numpy_version and int(numpy_version[0]) <= 1 and int(numpy_version[1]) <3 :
            print "Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
            self.warned_numpy_version = True
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
        for s in size:
            n_elements *= s
-        n_streams,_ = rstate.shape
+        n_streams, _ = rstate.shape
        rval = numpy.zeros(n_elements, dtype=self.output_type.dtype)
        err_orig = numpy.seterr(over='ignore')
        try:
            for i in xrange(n_elements):
-                sample = mrg_next_value(rstate[i%n_streams], rstate[i%n_streams])
+                sample = mrg_next_value(rstate[i % n_streams],
+                                        rstate[i % n_streams])
                rval[i] = sample
        finally:
            numpy.seterr(**err_orig)
        o_rstate[0] = node.outputs[0].type.filter(rstate)  # send to GPU if necessary
-        o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))# send to GPU if necessary
+        o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))  # send to GPU if necessary
    def c_code(self, node, name, inp, out, sub):
        rstate, size = inp
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
        fail = sub['fail']
        if self.output_type.dtype == 'float32':
            otype = 'float'
-            NORM = '4.6566126e-10f' #numpy.float32(1.0/(2**31+65))
+            NORM = '4.6566126e-10f'  # numpy.float32(1.0/(2**31+65))
            # this was determined by finding the biggest number such that
            # numpy.float32(number * M1) < 1.0
        else:
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
        }
        for (int i = 0; i < %(ndim)s; ++i)
        {
-            odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
+            odims[i] = ((npy_int32*)(PyArray_BYTES(%(size)s) + PyArray_STRIDES(%(size)s)[0] * i))[0];
            n_elements *= odims[i];
            must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
            //fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
        }
        n_streams = PyArray_DIMS(%(o_rstate)s)[0];
-        sample_data = (%(otype)s *) %(o_sample)s->data;
+        sample_data = (%(otype)s *) PyArray_DATA(%(o_sample)s);
-        state_data = (npy_int32 *) %(o_rstate)s->data;
+        state_data = (npy_int32 *) PyArray_DATA(%(o_rstate)s);
        for (int i = 0; i < n_elements; ++i)
        {
            npy_int32 * state_data_i = state_data + (i%%n_streams)*6;
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
    def c_support_code_apply(self, node, nodename):
        if self.output_type.dtype == 'float32':
            otype = 'float'
-            NORM = '4.6566126e-10f' #numpy.float32(1.0/(2**31+65))
+            NORM = '4.6566126e-10f'  # numpy.float32(1.0/(2**31+65))
            # this was determined by finding the biggest number such that
            # numpy.float32(number * M1) < 1.0
        else:
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
            }
        }
-        """ %locals()
+        """ % locals()
    def c_code(self, node, nodename, inp, out, sub):
        rstate, size = inp
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
        else:
            otype = 'double'
-        SYNC="CNDA_THREAD_SYNC";
+        SYNC = "CNDA_THREAD_SYNC"
        return """
        //////// <code generated by mrg_uniform>
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
        }
        for (int i = 0; i < %(ndim)s; ++i)
        {
-            odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
+            odims[i] = ((npy_int32*)(PyArray_BYTES(%(size)s) + PyArray_STRIDES(%(size)s)[0] * i))[0];
            n_elements *= odims[i];
            must_alloc_sample = (must_alloc_sample
                    || CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]);
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
        }
        //////// </ code generated by mrg_uniform>
-        """ %locals()
+        """ % locals()
    def c_code_cache_version(self):
        return (7,)
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
            elif seed >= M2:
                raise ValueError('seed should be less than %i' % M2, seed)
            self.rstate = numpy.asarray([seed]*6, dtype='int32')
-        elif len(seed)==6:
+        elif len(seed) == 6:
            if seed[0] == 0 and seed[1] == 0 and seed[2] == 0:
                raise ValueError('The first 3 values of seed should not be all 0', seed)
            if seed[3] == 0 and seed[4] == 0 and seed[5] == 0:
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
        """
        assert n_streams < 2**72
        assert n_streams > 0
-        rval = numpy.zeros((n_streams,6), dtype='int32')
+        rval = numpy.zeros((n_streams, 6), dtype='int32')
        rval[0] = self.rstate
        for i in xrange(1, n_streams):
            rval[i] = ff_2p72(rval[i - 1])
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
            # currently no Theano node that will do a frombuffer
            # reinterpretation.
            u = self.pretty_return(node_rstate,
-                    *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size))
+                                   *GPU_mrg_uniform.new(node_rstate,
+                                                        ndim, dtype, size))
        else:
            node_rstate = shared(self.get_substream_rstates(nstreams))
            u = self.pretty_return(node_rstate,
-                    *mrg_uniform.new(node_rstate, ndim, dtype, size))
+                                   *mrg_uniform.new(node_rstate,
+                                                    ndim, dtype, size))
        r = u * (high - low) + low
        if u.type.broadcastable != r.type.broadcastable:
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False
-optdb.register('random_make_inplace_mrg', opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace')
+optdb.register('random_make_inplace_mrg',
+               opt.in2out(mrg_random_make_inplace, ignore_newtrees=True),
+               99, 'fast_run', 'inplace')
--- a/theano/scan_module/scan_perform.c.txt
+++ b/theano/scan_module/scan_perform.c.txt
--- a/theano/scan_module/scan_perform.pyx
+++ b/theano/scan_module/scan_perform.pyx
--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
--- a/theano/tensor/tests/test_inc_subtensor.py
+++ b/theano/tensor/tests/test_inc_subtensor.py
--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py