range fixes.

069a9912 · David Warde-Farley · Arnaud Bergeron · 34d96ebd · 069a9912 · 069a9912
--- a/doc/crei2013/scan_pow.py
+++ b/doc/crei2013/scan_pow.py
@@ -22,5 +22,5 @@ power = theano.function(inputs=[A, k],
                        outputs=final_result,
                        updates=updates)
-print(power(range(10), 2))
+print(power(list(range(10)), 2))
 #[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
--- a/doc/hpcs2011_tutorial/scan_pow.py
+++ b/doc/hpcs2011_tutorial/scan_pow.py
@@ -18,4 +18,4 @@ final_result = result[-1]
 power = theano.function(inputs=[A,k], outputs=final_result,
                        updates=updates)
-print(power(range(10),2))
+print(power(list(range(10)), 2))
--- a/doc/tutorial/loop_solution_1.py
+++ b/doc/tutorial/loop_solution_1.py
@@ -31,7 +31,7 @@ final_result = result[-1]
 power = theano.function(inputs=[A, k], outputs=final_result,
                        updates=updates)
-print(power(range(10), 2))
+print(power(list(range(10)), 2))
 # [  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -1223,8 +1223,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
                    # When the CudaNdarray is built, the underlying memory
                    # is c-contiguous, so we transpose it before and after.
                    new_buf = CudaNdarray(new_buf.T)
-                    new_buf = cuda_dimshuffle(new_buf,
+                    new_buf = cuda_dimshuffle(
-                                              range(new_buf.ndim)[::-1])
+                        new_buf, reversed(list(range(new_buf.ndim))))
                f_cont_outputs[r] = new_buf

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1015,7 +1015,7 @@ class FunctionMaker(object):
                else:
                    flags = []
                    for output_new, output_old, i in zip(
-                            outputs_new, outputs_old, range(len(outputs_new))):
+                            outputs_new, outputs_old, xrange(len(outputs_new))):
                        print('loop through outputs node for both graphs')
                        graph_old.variables = set(gof.graph.variables(
                            graph_old.inputs, graph_old.outputs))

--- a/theano/gof/sched.py
+++ b/theano/gof/sched.py
@@ -140,7 +140,7 @@ def posort(l, *cmps):
    >>> lower_tens = lambda a, b: a/10 - b/10 # prefer lower numbers div 10
    >>> prefer evens = lambda a, b: a%2 - b%2 # prefer even numbers
-    >>> posort(range(20), lower_tens, prefer_evens)
+    >>> posort(list(range(20)), lower_tens, prefer_evens)
    [0, 8, 2, 4, 6, 1, 3, 5, 7, 9, 16, 18, 10, 12, 14, 17, 19, 11, 13, 15]
    implemented with _toposort """

--- a/theano/gof/tests/test_op.py
+++ b/theano/gof/tests/test_op.py
@@ -248,7 +248,7 @@ class TestMakeThunk(unittest.TestCase):
 def test_test_value_python_objects():
-    for x in (range(3), 0, 0.5, 1):
+    for x in (list(range(3)), 0, 0.5, 1):
        assert (op.get_test_value(x) == x).all()

--- a/theano/gof/tests/test_sched.py
+++ b/theano/gof/tests/test_sched.py
@@ -60,7 +60,7 @@ def test_posort_easy():
 def test_posort():
-    l = range(1, 20)
+    l = list(range(1, 20))
    cmps = [lambda a, b: a % 10 - b % 10,
            lambda a, b: (a / 10) % 2 - (b / 10) % 2,
            lambda a, b: a - b]

--- a/theano/itertools.txt
+++ b/theano/itertools.txt
+compat/python2x.py
+compat/__init__.py
+compile/debugmode.py
+compile/function_module.py
+gof/cc.py
+gradient.py
+ifelse.py
+sandbox/cuda/tests/test_mlp.py
+sandbox/gpuarray/elemwise.py
+sandbox/gpuarray/tests/test_basic_ops.py
+sandbox/scan.py
+sandbox/scan_module/scan.py
+sandbox/scan_module/scan_op.py
+sandbox/scan_module/scan_utils.py
+scalar/basic.py
+scan_module/scan.py
+scan_module/scan_op.py
+scan_module/scan_utils.py
+sparse/opt.py
+tensor/basic.py
+tensor/elemwise.py
+tensor/nnet/sigm.py
+tensor/nnet/tests/test_sigm.py
+tensor/opt.py
+tensor/subtensor.py
+tensor/tests/test_basic.py
+tensor/tests/test_elemwise.py
+tensor/tests/test_subtensor.py
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -275,7 +275,7 @@ class GpuElemwise(GpuOp):
                # TODO: use LComplete instead
                args.append(GpuDimShuffle(
                    input.type.broadcastable,
-                    ['x'] * difference + range(length)
+                    ['x'] * difference + list(range(length))
                    )(input))
        _inputs = args
@@ -813,8 +813,8 @@ class GpuCAReduce(GpuOp):
        ndim = len(self.reduce_mask)
        nd_out = ndim - sum(self.reduce_mask)
        shapes_format = "shape=(%s)" % ",".join(["%d"] * node.inputs[0].ndim)
-        shapes_data = ",".join(["CudaNdarray_HOST_DIMS(%s)[%d]" % (x, i)
+        shapes_data = ",".join("CudaNdarray_HOST_DIMS(%s)[%d]" % (x, i)
-                                for i in range(node.inputs[0].ndim)])
+                               for i in xrange(node.inputs[0].ndim))
        print("""
            if (verbose)
@@ -3209,7 +3209,7 @@ class GpuJoin(tensor.Join, GpuOp):
        def construct_slices(curlen):
            slices = [slice(None, None, None) for i in \
-                            range(len(template_shape))]
+                            xrange(len(template_shape))]
            slices[axis] = slice(curpos, curpos + curlen, None)
            return tuple(slices)

--- a/theano/sandbox/minimal.py
+++ b/theano/sandbox/minimal.py
@@ -65,6 +65,6 @@ class T_minimal(unittest.TestCase):
        print('built')
        Aval = self.rng.randn(5, 5)
-        bval = numpy.array(range(5), dtype=float)
+        bval = numpy.arange(5, dtype=float)
        f(Aval, bval)
        print('done')
--- a/theano/sandbox/neighbourhoods.py
+++ b/theano/sandbox/neighbourhoods.py
@@ -258,9 +258,9 @@ class NeighbourhoodsFromImages(Op):
                    (i, i) for i in xrange(len(self.strides))])
        out_idx = "".join(\
                ["outer_idx_%d," % (i,) for i in \
-                        range(self.n_dims_before)] + \
+                        xrange(self.n_dims_before)] + \
                ["stride_idx_%d," % (i,) for i in \
-                        range(len(self.strides))])
+                        xrange(len(self.strides))])
        out_idx += self._py_flattened_idx()
        #return_val = '\t' * (self.n_dims_before + len(self.strides)*2)

--- a/theano/sandbox/solve.py
+++ b/theano/sandbox/solve.py
@@ -58,7 +58,7 @@ class T_solve(unittest.TestCase):
    def test0(self):
        A = self.rng.randn(5, 5)
-        b = numpy.array(range(5), dtype=float)
+        b = numpy.arange(5, dtype=float)
        x = scipy.linalg.solve(A, b)
        Ax = numpy.dot(A, x)
        are = tensor.numeric_grad.abs_rel_err(Ax, b)

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -237,32 +237,32 @@ class test_upgrade_to_float(object):
    # at least float32, not float16.
    unary_ops_vals = [
-        (inv, range(-127, 0) + range(1, 127)),
+        (inv, list(range(-127, 0) + range(1, 127))),
-        (sqrt, range(0, 128)),
+        (sqrt, list(range(0, 128))),
-        (log, range(1, 128)),
+        (log, list(range(1, 128))),
-        (log2, range(1, 128)),
+        (log2, list(range(1, 128))),
-        (log10, range(1, 128)),
+        (log10, list(range(1, 128))),
-        (log1p, range(0, 128)),
+        (log1p, list(range(0, 128))),
-        (exp, range(-127, 89)),
+        (exp, list(range(-127, 89))),
-        (exp2, range(-127, 89)),
+        (exp2, list(range(-127, 89))),
-        (expm1, range(-127, 89)),
+        (expm1, list(range(-127, 89))),
-        (deg2rad, range(-127, 128)),
+        (deg2rad, list(range(-127, 128))),
-        (rad2deg, range(-127, 128)),
+        (rad2deg, list(range(-127, 128))),
-        (cos, range(-127, 128)),
+        (cos, list(range(-127, 128))),
-        (arccos, range(-1, 2)),
+        (arccos, list(range(-1, 2))),
-        (cosh, range(-89, 90)),
+        (cosh, list(range(-89, 90))),
-        (arccosh, range(1, 128)),
+        (arccosh, list(range(1, 128))),
-        (sin, range(-127, 128)),
+        (sin, list(range(-127, 128))),
-        (arcsin, range(-1, 2)),
+        (arcsin, list(range(-1, 2))),
-        (sinh, range(-89, 90)),
+        (sinh, list(range(-89, 90))),
-        (arcsinh, range(-127, 128)),
+        (arcsinh, list(range(-127, 128))),
-        (tan, range(-3, 4)),
+        (tan, list(range(-3, 4))),
-        (arctan, range(-127, 128)),
+        (arctan, list(range(-127, 128))),
-        (tanh, range(-127, 128)),
+        (tanh, list(range(-127, 128))),
        (arctanh, [0])]
    binary_ops_vals = [
-        (arctan2, range(-127, 128), range(-127, 128))]
+        (arctan2, list(range(-127, 128), range(-127, 128)))]
    @staticmethod
    def _test_unary(unary_op, x_range):

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -245,7 +245,7 @@ class Scan(PureOp):
        # inputs and output and ensure that they have the same dtype
        nb_recurr_outputs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
-        for outer_oidx in range(nb_recurr_outputs):
+        for outer_oidx in xrange(nb_recurr_outputs):
            inner_iidxs = self.var_mappings['inner_inp_from_outer_out'][outer_oidx]
            inner_oidxs = self.var_mappings['inner_out_from_outer_out'][outer_oidx]
@@ -776,8 +776,8 @@ class Scan(PureOp):
                d1 = numpy.max(cython_tap_array_len)
            d0 = len(self.tap_array)
            cython_tap_array = numpy.zeros((d0, d1), dtype='int32')
-            for _d0 in range(d0):
+            for _d0 in xrange(d0):
-                for _d1 in range(cython_tap_array_len[_d0]):
+                for _d1 in xrange(cython_tap_array_len[_d0]):
                    cython_tap_array[_d0, _d1] = self.tap_array[_d0][_d1]
            cython_mit_mot_out_nslices = \
                numpy.asarray([len(x) for x in self.mit_mot_out_slices],
@@ -789,8 +789,8 @@ class Scan(PureOp):
            d0 = len(self.mit_mot_out_slices)
            cython_mit_mot_out_slices = numpy.zeros((d0, d1),
                                                      dtype='int32')
-            for _d0 in range(d0):
+            for _d0 in xrange(d0):
-                for _d1 in range(cython_mit_mot_out_nslices[_d0]):
+                for _d1 in xrange(cython_mit_mot_out_nslices[_d0]):
                    cython_mit_mot_out_slices[_d0, _d1] = \
                        self.mit_mot_out_slices[_d0][_d1]
@@ -1492,9 +1492,9 @@ class Scan(PureOp):
        nb_inputs = len(self.inputs)
        nb_outputs = len(self.outputs)
-        for i in range(nb_inputs):
+        for i in xrange(nb_inputs):
            input = self.inputs[i]
-            inp_connection_pattern = [i == j for j in range(nb_inputs)]
+            inp_connection_pattern = [i == j for j in xrange(nb_inputs)]
            connect_pattern_by_var[input] = inp_connection_pattern
        # Iterate through the nodes used to produce the outputs from the
@@ -1514,11 +1514,11 @@ class Scan(PureOp):
            # For every output of the inner node, figure out which inputs it
            # is connected to by combining the connection pattern of the inner
            # node and the connection patterns of the inner node's inputs.
-            for out_idx in range(len(n.outputs)):
+            for out_idx in xrange(len(n.outputs)):
                out = n.outputs[out_idx]
                out_connection_pattern = [False] * nb_inputs
-                for inp_idx in range(len(n.inputs)):
+                for inp_idx in xrange(len(n.inputs)):
                    inp = n.inputs[inp_idx]
                    if inp in connect_pattern_by_var:
@@ -1530,17 +1530,17 @@ class Scan(PureOp):
                        if op_connection_pattern[inp_idx][out_idx]:
                            out_connection_pattern = [out_connection_pattern[i] or
                                                    inp_connection_pattern[i]
-                                                    for i in range(nb_inputs)]
+                                                    for i in xrange(nb_inputs)]
                # Store the connection pattern of the node output
                connect_pattern_by_var[out] = out_connection_pattern
        # Obtain the global connection pattern by combining the
        # connnection patterns of the individual outputs
-        global_connection_pattern = [[] for o in range(len(self.inputs))]
+        global_connection_pattern = [[] for o in xrange(len(self.inputs))]
        for out in self.outputs:
            out_connection_pattern = connect_pattern_by_var[out]
-            for i in range(len(self.inputs)):
+            for i in xrange(len(self.inputs)):
                global_connection_pattern[i].append(out_connection_pattern[i])
        return global_connection_pattern
@@ -1565,10 +1565,10 @@ class Scan(PureOp):
        # over every possible pairing of their corresponding inner inputs
        # and inner outputs and, if one such pair of inner variables is
        # connected than the pair of outer variables is connected.
-        for outer_oidx in range(len(node.outputs)):
+        for outer_oidx in xrange(len(node.outputs)):
            inner_oidxs = self.var_mappings['inner_out_from_outer_out'][outer_oidx]
-            for outer_iidx in range(len(node.inputs)):
+            for outer_iidx in xrange(len(node.inputs)):
                inner_iidxs = self.var_mappings['inner_inp_from_outer_inp'][outer_iidx]
                for inner_oidx in inner_oidxs:
@@ -1617,7 +1617,6 @@ class Scan(PureOp):
        indices because multiple inner variables can be associated with the
        same state
        """
        # Lists for outer variables contain individual indices, lists for
        # inner variables contain sequences of indices because many inner
        # variables can be associated with the same outer variable. The list
@@ -1634,7 +1633,7 @@ class Scan(PureOp):
        outer_oidx = 0
        # Handle sequences inputs
-        for i in range(self.info['n_seqs']):
+        for i in xrange(self.info['n_seqs']):
            outer_input_indices.append(outer_iidx)
            inner_input_indices.append([inner_iidx])
            inner_output_indices.append([])
@@ -1646,7 +1645,7 @@ class Scan(PureOp):
            outer_oidx += 0
        # Handle mitmots, mitsots and sitsots variables
-        for i in range(len(self.info['tap_array'])):
+        for i in xrange(len(self.info['tap_array'])):
            nb_input_taps = len(self.info['tap_array'][i])
            if i < self.n_mit_mot:
@@ -1655,10 +1654,10 @@ class Scan(PureOp):
                nb_output_taps = 1
            outer_input_indices.append(outer_iidx)
-            inner_input_indices.append(range(inner_iidx,
+            inner_input_indices.append(list(range(inner_iidx,
-                                             inner_iidx + nb_input_taps))
+                                                  inner_iidx + nb_input_taps)))
-            inner_output_indices.append(range(inner_oidx,
+            inner_output_indices.append(list(range(inner_oidx,
-                                              inner_oidx + nb_output_taps))
+                                                   inner_oidx + nb_output_taps)))
            outer_output_indices.append(outer_oidx)
            outer_iidx += 1
@@ -1671,7 +1670,7 @@ class Scan(PureOp):
        outer_iidx += self.info['n_shared_outs']
        # Handle nitsots variables
-        for i in range(self.n_nit_sot):
+        for i in xrange(self.n_nit_sot):
            outer_input_indices.append(outer_iidx)
            inner_input_indices.append([])
            inner_output_indices.append([inner_oidx])
@@ -1687,7 +1686,7 @@ class Scan(PureOp):
        outer_iidx -= (self.info['n_shared_outs'] + self.n_nit_sot)
        # Handle shared states
-        for i in range(self.info['n_shared_outs']):
+        for i in xrange(self.info['n_shared_outs']):
            outer_input_indices.append(outer_iidx)
            inner_input_indices.append([inner_iidx])
            inner_output_indices.append([inner_oidx])
@@ -1706,7 +1705,7 @@ class Scan(PureOp):
        # Note : the number of non-sequence inputs is not stored in self.info
        # so it has to be inferred from the number of inner inputs that remain
        # to be handled
-        for i in range(len(self.inputs) - inner_iidx):
+        for i in xrange(len(self.inputs) - inner_iidx):
            outer_input_indices.append(outer_iidx)
            inner_input_indices.append([inner_iidx])
            inner_output_indices.append([])
@@ -1732,10 +1731,10 @@ class Scan(PureOp):
                    "inner_inp_from_inner_out" : {},
                    "outer_out_from_inner_out" : {}}
-        for (oinp, iinp, iout, oout) in zip(outer_input_indices,
+        for (oinp, iinp, iout, oout) in izip(outer_input_indices,
-                                            inner_input_indices,
+                                             inner_input_indices,
-                                            inner_output_indices,
+                                             inner_output_indices,
-                                            outer_output_indices):
+                                             outer_output_indices):
            if oout != -1:
                mappings["outer_inp_from_outer_out"][oout] = oinp

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -1840,8 +1840,8 @@ class SquareDiagonal(gof.op.Op):
        N = len(diag)
        data = diag[:N]
-        indices = range(N)
+        indices = list(range(N))
-        indptr = range(N + 1)
+        indptr = list(range(N + 1))
        tup = (data, indices, indptr)
        z[0] = scipy.sparse.csc_matrix(tup, copy=True)

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -167,9 +167,9 @@ def as_tensor_variable(x, name=None, ndim=None):
        else:
            if (x.type.ndim > ndim):
                # strip off leading broadcastable dimensions
-                first_non_broadcastable = [idx for idx in range(x.ndim)
+                first_non_broadcastable = [idx for idx in xrange(x.ndim)
                                           if not x.broadcastable[idx]][0]
-                x = x.dimshuffle(range(x.ndim)[first_non_broadcastable:])
+                x = x.dimshuffle(list(range(x.ndim))[first_non_broadcastable:])
                if x.ndim > ndim:
                    raise ValueError(
                        'TensorType could not be cast to have %i dimensions'
@@ -1219,13 +1219,13 @@ class MaxAndArgmax(Op):
            axis = [int(a) for a in axis]
            if len(axis) != 1:
                axis = list(axis)
-                for idx in range(len(axis)):
+                for idx in xrange(len(axis)):
                    if axis[idx] < 0:
                        axis[idx] += x.type.ndim
                axis.sort()
-                if axis == range(-x.type.ndim, 0, 1):
+                if axis == list(range(-x.type.ndim, 0, 1)):
-                    axis = range(x.type.ndim)
+                    axis = list(range(x.type.ndim))
-                assert axis == range(x.type.ndim), (
+                assert axis == list(range(x.type.ndim)), (
                    "MaxAndArgmax does not support multiple"
                    " axes. the max fct supports it. Got %s" % axis)
                axis = None
@@ -1262,7 +1262,7 @@ class MaxAndArgmax(Op):
                    'input is: %s)' % (axis, x.type.ndim))
            all_axes.add(axis)
        else:
-            all_axes = range(x.ndim)
+            all_axes = list(range(x.ndim))
        if axis is None:
            axis = NoneConst.clone()
        else:
@@ -1408,7 +1408,7 @@ class MaxAndArgmax(Op):
        if g_max_disconnected:
            return [x.zeros_like(), axis_grad]
        if NoneConst.equals(axis):
-            axis_ = range(x.ndim)
+            axis_ = list(range(x.ndim))
        else:
            axis_ = axis
        xmax = max(x, axis_)
@@ -1419,7 +1419,7 @@ class MaxAndArgmax(Op):
        if NoneConst.equals(axis):
            # We are taking the max/argmax over all dimensions.
            axis = None
-        for i in range(x.ndim):
+        for i in xrange(x.ndim):
            if axis is None or i == axis.data:
                pattern.append('x')
            else:
@@ -1449,7 +1449,7 @@ def makeKeepDims(x, y, axis):
    y = as_tensor_variable(y)
    if axis is None:
-        axis = range(x.type.ndim)
+        axis = list(range(x.type.ndim))
    elif isinstance(axis, (int, numpy.integer)):
        axis = [axis]
    elif isinstance(axis, numpy.ndarray) and axis.ndim == 0:
@@ -2639,7 +2639,7 @@ class Alloc(gof.Op):
        gz = grads[0]
        n_axes_to_sum = gz.ndim - x.ndim
        # The number of dimensions added
-        axis = range(n_axes_to_sum)
+        axis = list(range(n_axes_to_sum))
        # The broadcasted dimensions
        axis_broadcasted = []
        axis_kept = []
@@ -2653,9 +2653,14 @@ class Alloc(gof.Op):
                axis_kept.append(i)
        gx = gz.sum(axis=axis + axis_broadcasted)
        if axis_broadcasted:
-            new_order = ['x'] * x.ndim
+            new_order = list(x.broadcastable)
-            for idx, axis in enumerate(axis_kept):
+            idx = 0
-                new_order[axis] = idx
+            for i in xrange(x.ndim):
+                if not new_order[i]:
+                    new_order[i] = idx
+                    idx += 1
+                else:
+                    new_order[i] = 'x'
            gx = gx.dimshuffle(new_order)
            # Dimshuffle to add back the broadcasted dims
        # The *elements* of the output are not connected to
@@ -2905,7 +2910,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
        shp = cast(shp, 'float64')
    if axis is None:
-        axis = range(input.ndim)
+        axis = list(range(input.ndim))
    elif isinstance(axis, (int, numpy.integer)):
        axis = [axis]
    elif isinstance(axis, numpy.ndarray) and axis.ndim == 0:
@@ -2944,7 +2949,7 @@ def var(input, axis=None, keepdims=False):
    input_ndim = input.type.ndim
    if axis is None:
-        axis = range(input_ndim)
+        axis = list(range(input_ndim))
    elif isinstance(axis, (int, numpy.integer)):
        axis = [axis]
    elif isinstance(axis, numpy.ndarray) and axis.ndim == 0:
@@ -3189,9 +3194,9 @@ def transpose(x, axes=None):
    """
    if axes is None:
-        axes = range((x.ndim - 1), -1, -1)
+        axes = list(range((x.ndim - 1), -1, -1))
    ret = DimShuffle(x.broadcastable, axes, inplace=False)(x)
-    if x.name and axes == range((x.ndim - 1), -1, -1):
+    if x.name and axes == list(range((x.ndim - 1), -1, -1)):
        ret.name = x.name + '.T'
    return ret
@@ -3232,7 +3237,7 @@ def batched_tensordot(x, y, axes=2):
        the last i dimensions of the first tensor and the first
        i dimensions of the second tensor (excluding the first
        (batch) dimension):
-            axes = [range(a.ndim - i, b.ndim), range(1,i+1)]
+            axes = [list(range(a.ndim - i, b.ndim)), list(range(1,i+1))]
        If an array, its two elements must contain compatible axes
        of the two tensors. For example, [[1, 2], [2, 4]] means sum
@@ -3372,10 +3377,10 @@ class Split(Op):
        splits = node.inputs[2]
        shp_x, shp_axis, shp_splits = in_shapes
        out_shapes = []
-        for i in range(self.len_splits):
+        for i in xrange(self.len_splits):
            temp = as_tensor_variable(shp_x)
            temp = theano.tensor.subtensor.set_subtensor(temp[axis], splits[i])
-            temp = [temp[i] for i in range(len(shp_x))]
+            temp = [temp[i] for i in xrange(len(shp_x))]
            out_shapes.append(temp)
        return out_shapes
@@ -3602,7 +3607,7 @@ class Join(Op):
                for x in as_tensor_variable_args:
                    for current_axis, bflag in enumerate(x.type.broadcastable):
-                        # Constant negative axis can no longer be negative at 
+                        # Constant negative axis can no longer be negative at
                        # this point. It safe to compare this way.
                        if current_axis == axis:
                            continue
@@ -3668,7 +3673,7 @@ class Join(Op):
        int axis = ((%(adtype)s *)PyArray_DATA(%(axis)s))[0];
        int ndim = PyArray_NDIM(%(input_1)s);
        if( axis < -ndim ){
-            PyErr_Format(PyExc_IndexError, 
+            PyErr_Format(PyExc_IndexError,
                         "Join axis %%d out of bounds [0, %%d)", axis, ndim);
            %(fail)s
        }
@@ -4411,7 +4416,7 @@ class Tile(Op):
        shp = in_shapes[0]
        tiled_shp = shp * reps
        out_shape = []
-        for i in range(self.ndim):
+        for i in xrange(self.ndim):
            out_shape.append(tiled_shp[i])
        return [out_shape]
@@ -4737,7 +4742,7 @@ class PermuteRowElements(Op):
        shp_y = in_shapes[1]
        assert len(shp_x) == len(shp_y)
        out_shape = []
-        for i in range(len(shp_x)):
+        for i in xrange(len(shp_x)):
            out_shape.append(maximum(shp_x[i], shp_y[i]))
        return [out_shape]
@@ -5088,7 +5093,7 @@ def tensordot(a, b, axes=2):
                 If an integer i, it is converted to an array containing
                 the last i dimensions of the first tensor and the first
                 i dimensions of the second tensor:
-                     axes = [range(a.ndim - i, b.ndim), range(i)]
+                     axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
                 If an array, its two elements must contain compatible axes
                 of the two tensors. For example, [[1, 2], [2, 0]] means sum
@@ -5175,13 +5180,13 @@ def tensordot(a, b, axes=2):
        outndim = a.ndim + b.ndim - (2 * axes)
        a_shape_0 = b_shape_0 = a_shape_1 = b_shape_1 = 1
-        for s0 in range(a.ndim - axes):
+        for s0 in xrange(a.ndim - axes):
            a_shape_0 *= a.shape[s0]
-        for s0 in range(axes):
+        for s0 in xrange(axes):
            b_shape_0 *= b.shape[s0]
-        for s1 in range(a.ndim - axes, a.ndim):
+        for s1 in xrange(a.ndim - axes, a.ndim):
            a_shape_1 *= a.shape[s1]
-        for s1 in range(axes, b.ndim):
+        for s1 in xrange(axes, b.ndim):
            b_shape_1 *= b.shape[s1]
        a_reshaped = a.reshape((a_shape_0, a_shape_1), ndim=2)
@@ -5459,7 +5464,7 @@ def swapaxes(y, axis1, axis2):
    "swap axes of inputted tensor"
    y = as_tensor_variable(y)
    ndim = y.ndim
-    li = range(0, ndim)
+    li = list(range(0, ndim))
    li[axis1], li[axis2] = li[axis2], li[axis1]
    return y.dimshuffle(li)
@@ -5552,7 +5557,7 @@ class Choose(Op):
                              theano.typed_list.TypedListVariable)
            raise ShapeError("Case not implemented")
            shape = shapes[0]
-            for i in range(len(shapes[0]) - 1):
+            for i in xrange(len(shapes[0]) - 1):
                shape[i] = shapes[1][i]
            return [(shape)]

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -411,7 +411,7 @@ class DimShufflePrinter:
        if new_order != () and  new_order[0] == 'x':
            return "%s" % self.__p(new_order[1:], pstate, r)
 #            return "[%s]" % self.__p(new_order[1:], pstate, r)
-        if list(new_order) == range(r.type.ndim):
+        if list(new_order) == list(range(r.type.ndim)):
            return pstate.pprinter.process(r)
        if list(new_order) == list(reversed(range(r.type.ndim))):
            return "%s.T" % pstate.pprinter.process(r)
@@ -544,7 +544,7 @@ class Elemwise(OpenMPOp):
                # TODO: use LComplete instead
                args.append(DimShuffle(
                    input.type.broadcastable,
-                    ['x'] * difference + range(length),
+                    ['x'] * difference + list(range(length)),
                    inplace=False)(input))
        inputs = args
@@ -1006,11 +1006,11 @@ class Elemwise(OpenMPOp):
            i += 1  # before this loop, i = number of inputs
            sub['lv%i' % i] = oname
            sub['olv'] = oname
-            alloc += cgen.make_declare([range(nnested)], [odtype],
+            alloc += cgen.make_declare([list(range(nnested))], [odtype],
                                       dict(sub, lv0=oname))
            alloc += cgen.make_alloc(orders, odtype, sub,
                                     fortran=alloc_fortran)
-            alloc += cgen.make_checks([range(nnested)], [odtype],
+            alloc += cgen.make_checks([list(range(nnested))], [odtype],
                                      dict(sub, lv0=oname))
        olv_index = i  # index of the last output
@@ -1058,7 +1058,7 @@ class Elemwise(OpenMPOp):
        }
        """ % locals()
-        loop_orders = orders + [range(nnested)] * len(real_onames)
+        loop_orders = orders + [list(range(nnested))] * len(real_onames)
        dtypes = (idtypes + list(real_odtypes))
        if all([o.ndim <= 1 for o in node.outputs] or
               # Use simpler code when output ndim == 0 or 1
@@ -1333,8 +1333,8 @@ class CAReduce(Op):
        input = as_tensor_variable(input)
        axis = self.axis
        if axis is None:
-            axis = range(len(input.type.broadcastable))
+            axis = list(range(len(input.type.broadcastable)))
-        if any([a < 0 for a in axis]):
+        if any(a < 0 for a in axis):
            axis2 = []
            for a in self.axis:
                if a < 0:
@@ -1388,7 +1388,7 @@ class CAReduce(Op):
        output, = out
        axis = self.axis
        if axis is None:
-            axis = range(input.ndim)
+            axis = list(range(input.ndim))
        variable = input
        to_reduce = reversed(sorted(axis))
@@ -1470,7 +1470,7 @@ class CAReduce(Op):
        axis = self.axis
        if axis is None:
-            axis = range(len(input.type.broadcastable))
+            axis = list(range(len(input.type.broadcastable)))
        if len(axis) == 0:
            # The acc_dtype is never a downcast compared to the input dtype
@@ -1510,11 +1510,11 @@ class CAReduce(Op):
        # Allocate output buffer
        alloc += cgen.make_declare(
-                [range(nnested) + ['x'] * len(axis)],
+                [list(range(nnested)) + ['x'] * len(axis)],
                [odtype], dict(sub, lv0=oname))
        alloc += cgen.make_alloc([order1], odtype, sub)
        alloc += cgen.make_checks(
-                [range(nnested) + ['x'] * len(axis)],
+                [list(range(nnested)) + ['x'] * len(axis)],
                [odtype], dict(sub, lv0=oname))
        if adtype != odtype:
@@ -1523,11 +1523,11 @@ class CAReduce(Op):
            sub['olv'] = aname
            alloc += cgen.make_declare(
-                    [range(nnested) + ['x'] * len(axis)],
+                    [list(range(nnested)) + ['x'] * len(axis)],
                    [adtype], dict(sub, lv0=aname))
            alloc += cgen.make_alloc([order1], adtype, sub)
            alloc += cgen.make_checks(
-                    [range(nnested) + ['x'] * len(axis)],
+                    [list(range(nnested)) + ['x'] * len(axis)],
                    [adtype], dict(sub, lv0=aname))
        if hasattr(self.scalar_op, 'identity'):
@@ -1552,7 +1552,7 @@ class CAReduce(Op):
            pattern = [0] * len(node.inputs[0].broadcastable)
            axis = self.axis
            if axis is None:
-                axis = range(len(pattern))
+                axis = list(range(len(pattern)))
            for i in axis:
                pattern[i] = 1
            pattern_ = str(pattern)[1:-1]
@@ -1608,7 +1608,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
        else:
            all_code = [task0_decl + code1]
        loop = cgen.make_loop_careduce(
-                [order, range(nnested) + ['x'] * len(axis)],
+                [order, list(range(nnested)) + ['x'] * len(axis)],
                [idtype, adtype], all_code, sub)
        end = ""
@@ -1928,7 +1928,7 @@ class Sum(CAReduceDtype):
        gz = as_tensor_variable(gz)
        axis = self.axis
        if axis is None:
-            axis = range(x.type.ndim)
+            axis = list(range(x.type.ndim))
        if axis == ():
            return gz,
        new_dims = []
@@ -2041,7 +2041,7 @@ class Prod(CAReduceDtype):
        gz = as_tensor_variable(gz)
        axis = self.axis
        if axis is None:
-            axis = range(prod_in.type.ndim)
+            axis = list(range(prod_in.type.ndim))
        if axis == ():
            return gz,
        new_dims = []

--- a/theano/tensor/elemwise_cgen.py
+++ b/theano/tensor/elemwise_cgen.py
@@ -373,7 +373,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
        var = sub["lv%i" % i]
        declare_strides += """
        %(ovar)s_loops_rit = %(ovar)s_loops.rbegin();""" % locals()
-        for j in reversed(range(nnested)):
+        for j in reversed(xrange(nnested)):
            declare_strides += """
            int %(var)s_stride_l%(j)i = init_strides[%(i)i][%(ovar)s_loops_rit->second];
            ++%(ovar)s_loops_rit;

--- a/theano/tensor/inplace.py
+++ b/theano/tensor/inplace.py
@@ -356,7 +356,7 @@ pprint.assign(pow_inplace, printing.OperatorPrinter('**=', 1, 'right'))
 def transpose_inplace(x, **kwargs):
    """Perform a transpose on a tensor without copying the underlying storage"""
-    dims = range(x.ndim-1, -1, -1)
+    dims = list(range(x.ndim - 1, -1, -1))
    return elemwise.DimShuffle(x.broadcastable, dims, inplace=True)(x)
 #pprint.assign(transpose_inplace, printing.MemberPrinter('T'))

--- a/theano/tensor/nnet/tests/speed_test_conv.py
+++ b/theano/tensor/nnet/tests/speed_test_conv.py
@@ -3,6 +3,7 @@ import sys, time, unittest
 import numpy
 import numpy as N
+from six.moves import xrange
 from theano.tests import unittest_tools as utt
@@ -17,23 +18,23 @@ def flip(kern, kshp):
    if len(kern.shape) == 2:
        kern = kern.reshape(-1)
        it = reversed(kern)
-        for i in range(kshp[0]):
+        for i in xrange(kshp[0]):
-            for j in range(kshp[1]):
+            for j in xrange(kshp[1]):
                flip[i, j] = next(it)
    elif len(kern.shape) == 3:
        kern = kern.reshape(kern.shape[0], -1)
-        for k in range(kern.shape[0]):
+        for k in xrange(kern.shape[0]):
            it = reversed(kern[k, :])
-            for i in range(kshp[0]):
+            for i in xrange(kshp[0]):
-                for j in range(kshp[1]):
+                for j in xrange(kshp[1]):
                    flip[k, i, j] = next(it)
    elif len(kern.shape) == 4:
        kern = kern.reshape(kern.shape[0], kern.shape[1], -1)
-        for k in range(kern.shape[0]):
+        for k in xrange(kern.shape[0]):
-            for m in range(kern.shape[1]):
+            for m in xrange(kern.shape[1]):
                it = reversed(kern[k, m, :])
-                for i in range(kshp[0]):
+                for i in xrange(kshp[0]):
-                    for j in range(kshp[1]):
+                    for j in xrange(kshp[1]):
                        flip[k, m, i, j] = next(it)
    else:
        raise NotImplementedError()
@@ -63,7 +64,8 @@ def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns,
        tctot = 0
        tpytot = 0
-        for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
+        for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns,
+                                              xrange(len(nkerns))):
            if do_print:
                print('************* layer %i ***************' % n_layer)
@@ -92,9 +94,9 @@ def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns,
                from scipy.signal.signaltools import  _valfrommode, _bvalfromboundary
                val = _valfrommode(conv_mode)
                bval = _bvalfromboundary('fill')
-                for b in range(bsize):  # loop over batches
+                for b in xrange(bsize):  # loop over batches
-                    for n in range(nkern):  # loop over filters
+                    for n in xrange(nkern):  # loop over filters
-                        for i in range(imshp[0]):  # loop over input feature maps
+                        for i in xrange(imshp[0]):  # loop over input feature maps
                            outval[b, n, ...] +=  _convolve2d(\
                                imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0)[0::ss[0], 0::ss[1]]
                ntot += time.time() - time1
@@ -112,14 +114,14 @@ def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns,
            propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
            time1 = time.time()
-            for i in range(repeat):
+            for i in xrange(repeat):
                hidval2_ = propup2(imgval, w_flip)
            hidval2 = hidval2_  # [:,:,0::ss[0],0::ss[1]]
            tctot += time.time() - time1
            if conv_op_py:
                time1 = time.time()
-                for i in range(repeat):
+                for i in xrange(repeat):
                    hidval3_ = propup3(imgval, w_flip)
                hidval3 = hidval3_  # [:,:,0::ss[0],0::ss[1]]
                tpytot += time.time() - time1
@@ -158,7 +160,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns,
        tctot = 0
        tpytot = 0
-        for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
+        for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, xrange(len(nkerns))):
            if do_print:
                print('************* layer %i ***************' % n_layer)
@@ -185,7 +187,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns,
            propup2 = function([inputs4, kerns4], conv_op)
            time1 = time.time()
-            for i in range(repeat):
+            for i in xrange(repeat):
                hidval2_ = propup2(imgval, w_flip)
            hidval2 = hidval2_  # [:,:,0::ss[0],0::ss[1]]
            tctot += time.time() - time1
@@ -231,13 +233,13 @@ def speed_multilayer_conv():
        best = []
        worst = []
        t_ = []
-        for unroll_b, n_b in zip(unroll_batch, range(len(unroll_batch))):
+        for unroll_b, n_b in zip(unroll_batch, xrange(len(unroll_batch))):
-            for unroll_k, n_k in zip(unroll_kern, range(len(unroll_kern))):
+            for unroll_k, n_k in zip(unroll_kern, xrange(len(unroll_kern))):
                t_b_k.append(str(unroll_b)+"/"+str(unroll_k))
                if not t_:
                    tctot, tpytot, ntot = [], [], []
-                    for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
+                    for conv_mode, n_mode in zip(convmodes, xrange(len(convmodes))):
-                        for ss, n_ss in zip(ssizes, range(len(ssizes))):
+                        for ss, n_ss in zip(ssizes, xrange(len(ssizes))):
 #                            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False)
                            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, verbose=verbose, do_print=False, repeat=repeat)
                            tctot += [tctot_]
@@ -260,8 +262,8 @@ def speed_multilayer_conv():
        tctot, tpytot, ntot = [], [], []
        tctot_ = []
        if not tctot_:
-            for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
+            for conv_mode, n_mode in zip(convmodes, xrange(len(convmodes))):
-                for ss, n_ss in zip(ssizes, range(len(ssizes))):
+                for ss, n_ss in zip(ssizes, xrange(len(ssizes))):
 #                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False)
                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, verbose=verbose, do_print=False, repeat=repeat)
                    tctot += [tctot_]
@@ -273,8 +275,8 @@ def speed_multilayer_conv():
        worst = N.asarray(worst)
        print("timing for unrolled version")
        print("unroll_batch/unroll_kern valid_mode full_mode")
-        for n_b in range(len(unroll_batch)):
+        for n_b in xrange(len(unroll_batch)):
-            for n_k in range(len(unroll_kern)):
+            for n_k in xrange(len(unroll_kern)):
                print((unroll_batch[n_b], unroll_kern[n_k]) + tuple(t[n_b, n_k]), ',')
        t_detail = t
        t = t.sum(axis=2)
@@ -287,8 +289,8 @@ def speed_multilayer_conv():
        print('time unroll_patch')
        tctot_patch = []
        tctot_patch_size = []
-        for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
+        for conv_mode, n_mode in zip(convmodes, xrange(len(convmodes))):
-            for ss, n_ss in zip(ssizes, range(len(ssizes))):
+            for ss, n_ss in zip(ssizes, xrange(len(ssizes))):
                #tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False)
                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, unroll_patch=True, verbose=verbose, do_print=False, repeat=repeat)
                tctot_patch += [tctot_]

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -489,8 +489,8 @@ def local_dimshuffle_lift(node):
        inplace = op.inplace and inode.op.inplace
        iinput = inode.inputs[0]
        # remove useless dimshuffle
-        if new_order == range(len(new_order)) and (len(new_order) ==
+        if (new_order == list(range(len(new_order))) and
-                                                   iinput.type.ndim):
+                len(new_order) == iinput.type.ndim):
            return [iinput]
        else:
            ret = op.__class__(iinput.type.broadcastable, new_order,
@@ -908,7 +908,7 @@ class ShapeFeature(object):
                        len(s), r.ndim, sio.getvalue()))
            shape_vars = []
-            for i in range(r.ndim):
+            for i in xrange(r.ndim):
                if (hasattr(r.type, 'broadcastable') and
                    r.type.broadcastable[i]):
                    shape_vars.append(self.lscalar_one)
@@ -921,7 +921,7 @@ class ShapeFeature(object):
                        self.lscalar_one.equals(shape_vars[i]) or
                        self.lscalar_one.equals(
                            T.extract_constant(shape_vars[i]))
-                        for i in range(r.ndim)])
+                        for i in xrange(r.ndim)])
            self.shape_of[r] = tuple(shape_vars)
            for sv in shape_vars:
                self.shape_of_reverse_index.setdefault(sv, set()).add(r)
@@ -997,7 +997,7 @@ class ShapeFeature(object):
                    self.lscalar_one.equals(merged_shape[i]) or
                    self.lscalar_one.equals(
                        T.extract_constant(merged_shape[i]))
-                    for i in range(r.ndim)])
+                    for i in xrange(r.ndim)])
        self.shape_of[r] = tuple(merged_shape)
        for sv in self.shape_of[r]:
            self.shape_of_reverse_index.setdefault(sv, set()).add(r)
@@ -1020,7 +1020,7 @@ class ShapeFeature(object):
                    # But we never timed this speed optimization!
                    self.lscalar_one.equals(new_shape[idx]) or
                    self.lscalar_one.equals(T.extract_constant(new_shape[idx]))
-                    for idx in range(r.ndim)])
+                    for idx in xrange(r.ndim)])
        self.shape_of[r] = tuple(new_shape)
        for sv in self.shape_of[r]:
            self.shape_of_reverse_index.setdefault(sv, set()).add(r)
@@ -1826,7 +1826,7 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
                    nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim
                    alloc_input = alloc_input.dimshuffle(
                        ['x'] * nb_dim_to_add +
-                        range(alloc_input.ndim))
+                        list(range(alloc_input.ndim)))
                # We need to keep the dimshuffle. It could swap axes or
                # add dimensions anywhere.
@@ -2990,7 +2990,7 @@ def local_join_empty(node):
        join_idx = get_scalar_constant_value(node.inputs[0])
    except NotScalarConstantError:
        return
-    for idx in range(1, len(node.inputs)):
+    for idx in xrange(1, len(node.inputs)):
        inp = node.inputs[idx]
        # We can not use size == 0,, as this can change shape from 3,0
        # to 2,0.  This trigger DebugMode error. This happen with
@@ -3029,7 +3029,7 @@ def local_join_make_vector(node):
    if not isinstance(node.op, T.Join) or node.outputs[0].ndim != 1:
        return
    new_inputs = [node.inputs[1]]
-    for idx in range(2, len(node.inputs)):
+    for idx in xrange(2, len(node.inputs)):
        inp = node.inputs[idx]
        if (inp.owner and
            isinstance(inp.owner.op, MakeVector) and
@@ -3202,7 +3202,7 @@ def local_useless_tile(node):
                        # implement the opt and test it.
                        return
                        x_nd = node.inputs[0].ndim
-                        broad = ['x'] * (l - x_nd) + range(x_nd)
+                        broad = ['x'] * (l - x_nd) + xrange(x_nd)
                        return [node.inputs[0].dimshuffle(broad)]
                except ValueError:
                    return
@@ -3948,7 +3948,7 @@ def local_sum_div_dimshuffle(node):
    if isinstance(node.op, T.Sum):
        axis = node.op.axis
        if axis is None:
-            axis = range(node.inputs[0].ndim)
+            axis = list(range(node.inputs[0].ndim))
        # print 'axis =', axis
        thing_summed = node.inputs[0]
        if thing_summed.owner and thing_summed.owner.op == T.true_div:
@@ -4044,7 +4044,7 @@ def local_sum_prod_all_to_none(node):
 def local_op_of_op(node):
    """
    Prod(Prod()) -> single Prod()
-    or 
+    or
    Sum(Sum()) -> single Sum()
    """
    if isinstance(node.op, T.elemwise.Prod) or isinstance(node.op, T.Sum):
@@ -4055,13 +4055,13 @@ def local_op_of_op(node):
        # doesn't affect other computations.
        if len(node_inps.clients) == 1:
            if (node_inps.owner and (isinstance(node_inps.owner.op, T.elemwise.Prod)
-                    or isinstance(node_inps.owner.op, T.elemwise.Sum))): 
+                    or isinstance(node_inps.owner.op, T.elemwise.Sum))):
-                # check to see either the inner or outer prod is doing a 
+                # check to see either the inner or outer prod is doing a
                # product over all axis, in which case we can remove it
                if node_inps.owner.op.axis is None or node.op.axis is None:
                    return [opt_type(None, dtype=out_dtype)(
-                        node_inps.owner.inputs[0])] 
+                        node_inps.owner.inputs[0])]
                # figure out which axes were in the original sum
                newaxis = list(tuple(node_inps.owner.op.axis))
@@ -4076,10 +4076,10 @@ def local_op_of_op(node):
                assert len(newaxis) == len(list(node_inps.owner.op.axis) +
                                           list(node.op.axis))
                # The old bugged logic. We keep it there to generate a warning
                # when we generated bad code.
-                alldims = range(node_inps.owner.inputs[0].type.ndim)
+                alldims = list(range(node_inps.owner.inputs[0].type.ndim))
                alldims = [d for i, d in enumerate(alldims) if i
                           in node_inps.owner.op.axis]
                alldims = [d for i, d in enumerate(alldims)
@@ -4227,7 +4227,7 @@ def local_reduce_broadcastable(node):
                new_axis = []
                pattern = []
                ii = 0
-                for p in range(reduced.ndim):
+                for p in xrange(reduced.ndim):
                    if p not in cuttable:
                        if p in axis:
                            new_axis.append(ii)
@@ -4251,7 +4251,7 @@ def local_reduce_broadcastable(node):
 @gof.local_optimizer([T.Sum, T.elemwise.Prod])
 def local_opt_alloc(node):
    """ sum(alloc(constant,shapes...)) => constant*prod(shapes)
-        or 
+        or
        prod(alloc(constant,shapes...)) => constant**prod(shapes)
    """
    if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):

--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
@@ -418,7 +418,7 @@ def _generate_broadcasting_indices(out_shape, *shapes):
        # Temporary list to generate the indices
        _ret_indices = [[] for shape in all_shapes]
-        out_range = range(out_shape[dim])
+        out_range = list(range(out_shape[dim]))
        # Verify the shapes are compatible along that dimension
        # and generate the appropriate range: out_range, or [0, ..., 0]

--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
@@ -332,14 +332,14 @@ def kron(a, b):
    o = tensor.outer(a, b)
    o = o.reshape(tensor.concatenate((a.shape, b.shape)),
                  a.ndim + b.ndim)
-    shf = o.dimshuffle(0, 2, 1, * range(3, o.ndim))
+    shf = o.dimshuffle(0, 2, 1, * list(range(3, o.ndim)))
    if shf.ndim == 3:
        shf = o.dimshuffle(1, 0, 2)
        o = shf.flatten()
    else:
        o = shf.reshape((o.shape[0] * o.shape[2],
                         o.shape[1] * o.shape[3]) +
-                        tuple([o.shape[i] for i in range(4, o.ndim)]))
+                        tuple(o.shape[i] for i in xrange(4, o.ndim)))
    return o

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -7,6 +7,7 @@ import logging
 _logger = logging.getLogger("theano.tensor.subtensor")
 import numpy
+from six.moves import xrange
 import theano
 from theano.compat import izip
@@ -1034,7 +1035,7 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
                                                                      y.ndim))
    dim_offset = x.ndim - y.ndim
-    for dim in range(y.ndim):
+    for dim in xrange(y.ndim):
        if (x.broadcastable[dim + dim_offset]
                and not y.broadcastable[dim]):
            # It is acceptable to try to increment a subtensor with a
@@ -1132,7 +1133,7 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        if y.ndim > 0:
            # This if is needed to prevent some useless warning about
            # old code bug.
-            expanded_y = alloc(y, *[x.shape[i] for i in range(x.ndim)])
+            expanded_y = alloc(y, *[x.shape[i] for i in xrange(x.ndim)])
            flattened_y = expanded_y.flatten(inner_x.ndim)
        else:
            flattened_y = y
@@ -1570,7 +1571,7 @@ def _sum_grad_over_bcasted_dims(x, gx):
        x_broad = (True,) * x_dim_added + x.broadcastable
        assert sum(gx.broadcastable) < sum(x_broad)
        axis_to_sum = []
-        for i in range(gx.ndim):
+        for i in xrange(gx.ndim):
            if gx.broadcastable[i] is False and x_broad[i] is True:
                axis_to_sum.append(i)
            elif (gx.broadcastable[i] is True and
@@ -1584,9 +1585,9 @@ def _sum_grad_over_bcasted_dims(x, gx):
        gx = gx.sum(axis=axis_to_sum, keepdims=True)
        if gx.ndim != x.ndim:
            assert gx.ndim > x.ndim
-            for i in range(x_dim_added):
+            for i in xrange(x_dim_added):
                assert gx.broadcastable[i]
-            gx = gx.dimshuffle(*range(x_dim_added, gx.ndim))
+            gx = gx.dimshuffle(*list(range(x_dim_added, gx.ndim)))
        assert gx.broadcastable == x.broadcastable
    return gx
@@ -2293,7 +2294,7 @@ def take(a, indices, axis=None, mode='raise'):
            if axis < 0:
                axis += a.ndim
            assert axis >= 0
-            shuffle = range(a.ndim)
+            shuffle = list(range(a.ndim))
            shuffle[0] = axis
            shuffle[axis] = 0
            return advanced_subtensor1(

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -7,6 +7,8 @@ from nose.plugins.skip import SkipTest
 from nose.plugins.attrib import attr
 from nose.tools import raises
+from six.moves import xrange
 import theano
 from theano.compat import imap
 from theano import gof, scalar, config
@@ -348,7 +350,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                e = as_tensor_variable(tensor_op(x, axis=tosum, **d))
            if tosum is None:
-                tosum = range(len(xsh))
+                tosum = list(range(len(xsh)))
            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            xv = numpy.asarray(numpy.random.rand(*xsh))
@@ -462,7 +464,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
            else:
                e = tensor_op(x, axis=tosum)
            if tosum is None:
-                tosum = range(len(xsh))
+                tosum = list(range(len(xsh)))
            f = copy(linker).accept(FunctionGraph([x],
                                                  [e.shape])).make_function()
            if not(scalar_op in [scalar.maximum, scalar.minimum] and
@@ -546,7 +548,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
            if pre_scalar_op is not None:
                x = pre_scalar_op(x)
            if tosum is None:
-                tosum = range(len(xsh))
+                tosum = list(range(len(xsh)))
            xv = numpy.asarray(numpy.random.rand(*xsh), dtype=dtype)
            d = {}
            if pre_scalar_op is not None:
@@ -1169,7 +1171,7 @@ class TestElemwise(unittest_tools.InferShapeTester):
        s = a + b + c + d + e + f
        g = theano.function([a, b, c, d, e, f], s,
                             mode=theano.compile.Mode(linker='py'))
-        g(*[numpy.zeros(2 ** 11, config.floatX) for i in range(6)])
+        g(*[numpy.zeros(2 ** 11, config.floatX) for i in xrange(6)])
 def test_gt_grad():

--- a/theano/tensor/tests/test_keepdims.py
+++ b/theano/tensor/tests/test_keepdims.py
@@ -13,7 +13,7 @@ class TestKeepDims(unittest.TestCase):
    def makeKeepDims_local(self, x, y, axis):
        if axis is None:
-            newaxis = range(x.ndim)
+            newaxis = list(range(x.ndim))
        elif isinstance(axis, int):
            if axis < 0:
                newaxis = [axis + x.type.ndim]

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
--- a/theano/tests/run_tests_in_batch.py
+++ b/theano/tests/run_tests_in_batch.py
@@ -178,7 +178,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile,
        for test_id in xrange(1, n_tests + 1, batch_size):
            stdout.flush()
            stderr.flush()
-            test_range = range(test_id, min(test_id + batch_size, n_tests + 1))
+            test_range = list(range(test_id,
+                                    min(test_id + batch_size, n_tests + 1)))
            cmd = ([python, theano_nose, '--with-id'] +
                   list(map(str, test_range)) +
                   argv)