Merged

bf02577b · Olivier Delalleau · 8ea21f09 · 6e91c9d4 · 8ea21f09 · bf02577b
--- a/ChangeLog
+++ b/ChangeLog
-2010-10-22 Theano 0.3
---------------------
-This is the first major release of Theano since 0.1. Version 0.2 development
-started internally but it was never advertised as a release.
-There have been so many changes since 0.1 that we have lost track of many of
-them. Below is a *partial* list of changes since 0.1.
- * GPU code using NVIDIA's CUDA framework is now generated for many Ops.
- * Some interface changes since 0.1:
-     * A new "shared variable" system to allow reusing memory space between
-       Theano functions.
-         * A new memory contract has been formally written for Theano,
-           for people who want to minimize memory copies.
-     * The old module system has been deprecated.
-     * By default, inputs to a Theano function will not be silently
-       downcasted (e.g. from float64 to float32).
-     * An error is now raised when using the result of logical operation on
-       Theano variable in an 'if' (i.e. an implicit call to __nonzeros__).
-     * An error is now raised when we receive a non-aligned ndarray as
-       input to a function (this is not supported).
-     * An error is raised when the list of dimensions passed to
-       dimshuffle() contains duplicates or is otherwise not sensible.
-     * Call NumPy BLAS bindings for gemv operations in addition to the
-       already supported gemm.
-     * If gcc is unavailable at import time, Theano now falls back to a
-       Python-based emulation mode after raising a warning.
-     * An error is now raised when tensor.grad is called on a non-scalar
-       Theano variable (in the past we would implicitly do a sum on the
-       tensor to make it a scalar).
-     * Added support for "erf" and "erfc" functions.
- * The current default value of the parameter axis of theano.{max,min,
-   argmax,argmin,max_and_argmax} is deprecated. We now use the default NumPy
-   behavior of operating on the entire tensor.
- * Theano is now available from PyPI and installable through "easy_install" or
-   "pip".
--- a/HISTORY.txt
+++ b/HISTORY.txt
+.. _NEWS:
+=============
+Release Notes
+=============
+Theano 0.1
+==========
+*Release date: 2009-04-02*
+What works
+----------
+- building symbolic expression.
+- arranging symbolic expressions into Modules so that multiple functions
+  can work on the same data.
+- symbolic gradient descent.
+- graph optimization.
+- compilation to C for many kinds of expression.
+- a debugging mode that checks that your expression results are correct,
+  using a variety of sanity checks.
+What's missing?
+---------------
+- An algorithm library. We're missing a library of examples and standard
+  component implementations.  Some examples will find their way into
+  the Theano repo, but standard algorithms will go into the 'pylearn'
+  project (toolbox style). Now that we have a stable foundation, we
+  can reach a consensus on style for algorithms.
--- a/NEWS.txt
+++ b/NEWS.txt
-doc/NEWS.txt
\ No newline at end of file
--- a/NEWS.txt
+++ b/NEWS.txt
+Theano 0.3 (2010-11-23)
+-----------------------
+This is the first major release of Theano since 0.1. Version 0.2 development started internally but it was never advertised as a release.
+There have been so many changes since 0.1 that we have lost track of many of them. Below is a *partial* list of changes since 0.1.
+ * GPU code using NVIDIA's CUDA framework is now generated for many Ops.
+ * Some interface changes since 0.1:
+     * A new "shared variable" system to allow reusing memory space between Theano functions.
+         * A new memory contract has been formally written for Theano, for people who want to minimize memory copies.
+     * The old module system has been deprecated.
+     * By default, inputs to a Theano function will not be silently downcasted (e.g. from float64 to float32).
+     * An error is now raised when using the result of logical operation on Theano variable in an 'if' (i.e. an implicit call to __nonzeros__).
+     * An error is now raised when we receive a non-aligned ndarray as input to a function (this is not supported).
+     * An error is raised when the list of dimensions passed to dimshuffle() contains duplicates or is otherwise not sensible.
+     * Call NumPy BLAS bindings for gemv operations in addition to the already supported gemm.
+     * If gcc is unavailable at import time, Theano now falls back to a Python-based emulation mode after raising a warning.
+     * An error is now raised when tensor.grad is called on a non-scalar Theano variable (in the past we would implicitly do a sum on the tensor to make it a scalar).
+     * Added support for "erf" and "erfc" functions.
+ * The current default value of the parameter axis of theano.{max,min,argmax,argmin,max_and_argmax} is deprecated. We now use the default NumPy behavior of operating on the entire tensor.
+ * Theano is now available from PyPI and installable through "easy_install" or "pip".
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,8 @@ MAINTAINER          = "LISA laboratory, University of Montreal"
 MAINTAINER_EMAIL    = "theano-dev@googlegroups.com"
 DESCRIPTION         = ('Optimizing compiler for evaluating mathematical ' +
                       'expressions on CPUs and GPUs.')
-LONG_DESCRIPTION    = open("DESCRIPTION.txt").read()
+LONG_DESCRIPTION    = (open("DESCRIPTION.txt").read() + "\n\n"
+                       open("NEWS.txt").read())
 URL                 = "http://deeplearning.net/software/theano/"
 DOWNLOAD_URL        = ""
 LICENSE             = 'BSD'

--- a/theano/sandbox/cuda/tests/test_tensor_op.py
+++ b/theano/sandbox/cuda/tests/test_tensor_op.py
@@ -40,20 +40,32 @@ def test_shape_i():
        assert len(topo)==1
        assert isinstance(topo[0].op,T.opt.Shape_i)
+def test_shape():
+    x = cuda.ftensor3()
+    v = cuda.CudaNdarray(numpy.zeros((3,4,5),dtype='float32'))
+    f = theano.function([x],x.shape)
+    topo = f.maker.env.toposort()
+    assert numpy.all(f(v)==(3,4,5))
+    if theano.config.mode!='FAST_COMPILE':
+        assert len(topo)==4
+        assert isinstance(topo[0].op,T.opt.Shape_i)
+        assert isinstance(topo[1].op,T.opt.Shape_i)
+        assert isinstance(topo[2].op,T.opt.Shape_i)
+        assert isinstance(topo[3].op,T.opt.MakeVector)
 def test_softmax_optimizations():
    from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot, crossentropy_softmax_argmax_1hot_with_bias
    x = tensor.fmatrix('x')
    one_of_n = tensor.lvector('one_of_n')
    op = crossentropy_categorical_1hot
    xe = op(x, one_of_n)
    env = theano.gof.Env(
        [x, one_of_n],
        [op(softmax(x), one_of_n)])
    assert env.outputs[0].owner.op == op
    mode_with_gpu.optimizer.optimize(env)
    assert str(env.outputs[0].owner.op) == 'OutputGuard'

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -248,6 +248,12 @@ class _sparse_py_operators:
    def __dot__(left, right): return structured_dot(left, right)
    def __rdot__(right, left): return structured_dot(left, right)
+    #def _as_TensorVariable(self):
+    #    return dense_from_sparse(self)
+    shape = property(lambda self: tensor.shape(self))
+    ndim = property(lambda self: self.type.ndim)
+    dtype = property(lambda self: self.type.dtype)
 class SparseVariable(gof.Variable, _sparse_py_operators):
    dtype = property(lambda self: self.type.dtype)
@@ -1148,16 +1154,16 @@ def structured_dot_grad(sparse_A, dense_B, ga):
    if sparse_A.type.format in ('csc','csr'):
        if sparse_A.type.format == 'csc':
-          sdgcsx = sdg_csc
+            sdgcsx = sdg_csc
        else:
-          sdgcsx = sdg_csr
+            sdgcsx = sdg_csr
        #backport
        #sdgcsx = sdg_csc if sparse_A.type.format == 'csc' else sdg_csr
        if sparse_A.type.format == 'csc':
-          CSx = CSC
+            CSx = CSC
        else:
-          CSx = CSR
+            CSx = CSR
        #backport
        #CSx = CSC if sparse_A.type.format == 'csc' else CSR
@@ -1380,4 +1386,3 @@ class StructuredDotGradCSR(gof.Op):
        """% dict(locals(), **sub)
 sdg_csr = StructuredDotGradCSR()
--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -155,7 +155,7 @@ class T_AddMul(unittest.TestCase):
            elif op is mul:
                self.failUnless(_is_sparse_variable(apb))
                self.failUnless(numpy.all(val.todense() == (b.multiply(a))))
-                self.failUnless(numpy.all(val.todense() == numpy.array([[1, 0], 
+                self.failUnless(numpy.all(val.todense() == numpy.array([[1, 0],
 [9, 0], [0, 36]])))
    def _testDS(self, op, array1 = numpy.array([[1., 0], [3, 0], [0, 6]]),
@@ -187,7 +187,7 @@ class T_AddMul(unittest.TestCase):
            elif op is mul:
                self.failUnless(_is_sparse_variable(apb))
                self.failUnless(numpy.all(val.todense() == (a.multiply(b))))
-                self.failUnless(numpy.all(val.todense() == numpy.array([[1, 0], 
+                self.failUnless(numpy.all(val.todense() == numpy.array([[1, 0],
 [9, 0], [0, 36]])))
@@ -244,7 +244,7 @@ class test_structureddot(unittest.TestCase):
            assert rval.type.dtype == 'float32'
            return rval
-        utt.verify_grad(buildgraphCSC, 
+        utt.verify_grad(buildgraphCSC,
                    [spmat.data, mat])
    def test_structureddot_csr_grad(self):
@@ -264,7 +264,7 @@ class test_structureddot(unittest.TestCase):
            assert rval.type.dtype == 'float64'
            return rval
-        utt.verify_grad(buildgraph, 
+        utt.verify_grad(buildgraph,
                    [spmat.data, mat])
    def test_upcast(self):
@@ -307,7 +307,7 @@ class test_structureddot(unittest.TestCase):
        # Test that a graph involving structured_dot(assembled_csc_matrix) is optimized to be
        # just a structured_dot_csc Op and no assembly of a csc_matrix.
        #
-        # The optimization from structured_dot -> structured_dot_csc is currently disabled, 
+        # The optimization from structured_dot -> structured_dot_csc is currently disabled,
        # So this test is not expected to pass
        return
@@ -320,7 +320,7 @@ class test_structureddot(unittest.TestCase):
            y = numpy.floor(numpy.random.rand()*spmat.shape[1])
            spmat[x,y] = numpy.random.rand()*10
        spmat = sp.csc_matrix(spmat)
        images = tensor.Tensor(dtype='float32', broadcastable=[False, False])('images')
        cscmat = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
@@ -364,7 +364,7 @@ class test_structureddot(unittest.TestCase):
        #print f.maker.env.toposort()
-        for M,N,K,nnz in [(4,3,2,3), 
+        for M,N,K,nnz in [(4,3,2,3),
                (40,30,20,3),
                (40,30,20,30),
                (400,3000,200,6000),
@@ -417,7 +417,7 @@ class test_structureddot(unittest.TestCase):
        print f.maker.env.toposort()
-        for M,N,K,nnz in [(4,3,2,3), 
+        for M,N,K,nnz in [(4,3,2,3),
                (40,30,20,3),
                (40,30,20,30),
                (400,3000,200,6000),

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -398,7 +398,7 @@ class TensorType(Type):
        """
        self.dtype = str(dtype)
        if self.dtype=='floatX':
-          self.dtype=config.floatX
+            self.dtype=config.floatX
        ###    broadcastable is immutable, and all elements are either True or False
        self.broadcastable = tuple(bool(b) for b in broadcastable)
        self.dtype_specs() # error checking is done there
@@ -676,7 +676,7 @@ class TensorType(Type):
                if any(b):
                    bcast = str(b)
                else:
-                        bcast = '%iD' % len(b)
+                    bcast = '%iD' % len(b)
            return "TensorType(%s, %s)" % (str(self.dtype), bcast)
    def __repr__(self):
@@ -1291,9 +1291,9 @@ def _scal_elemwise(symbol):
    symbolname = symbol.__name__
    inplace = symbolname.endswith('_inplace')
    if inplace:
-      msg = "inplace"
+        msg = "inplace"
    else:
-      msg = "no_inplace"
+        msg = "no_inplace"
    n="Elemwise{%s,%s}"%(symbolname,msg)
    if inplace:
@@ -1507,7 +1507,7 @@ class MaxAndArgmax(Op):
            for id,a in enumerate(axis):
                if not isinstance(a, TensorVariable) and a<0:
                    if -a>x.type.ndim:
-                      raise ValueError('axis out of range')
+                        raise ValueError('axis out of range')
                    axis[id]=x.type.ndim+a
        axis = _as_tensor_variable(axis)
        inputs = [x, axis]
@@ -1540,18 +1540,18 @@ class MaxAndArgmax(Op):
        if not ( axis.data == 0 or axis.data == x.ndim-1):
            raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
        if axis.data==0:
-          g_max_pad = shape_padleft(g_max)
+            g_max_pad = shape_padleft(g_max)
        else:
-           g_max_pad = shape_padright(g_max)
+            g_max_pad = shape_padright(g_max)
        xmax = max(x, axis)
        if axis.data==0:
-          xmax_pad = shape_padleft(xmax)
+            xmax_pad = shape_padleft(xmax)
        else:
-          xmax_pad = shape_padright(xmax)
+            xmax_pad = shape_padright(xmax)
        g_x = eq(xmax_pad, x) * g_max_pad
        return g_x, None
    def __str__(self):
-      return self.__class__.__name__
+        return self.__class__.__name__
 _max_and_argmax = MaxAndArgmax()
 @_redefine_asRoutine(_max_and_argmax)
@@ -1579,12 +1579,12 @@ def max(x, axis='DEFAULT'):
        axis = x.type.ndim - 1
        warnings.warn("The behavior of max when axis==None will change! Now we return the max over the last dimensions. It will change to the max over all dimensions as numpy. To hide this warning and be compatible with the future behavior, set axis to -1 to have the current behavior. To have the futur behavior set axis to range(nb dim), but this don't support the grad. To have the grad, you must flatten the tensor before calling max().")
    if isinstance(axis,(list,tuple)) and len(axis)>1:
-       return CAReduce(scal.maximum,axis)(x)
+        return CAReduce(scal.maximum,axis)(x)
    try:
-      const = get_constant_value(axis)
+        const = get_constant_value(axis)
-      return CAReduce(scal.maximum,list(const))(x)
+        return CAReduce(scal.maximum,list(const))(x)
    except:
-      return max_and_argmax(x,axis)[0]
+        return max_and_argmax(x,axis)[0]
 @constructor
 def argmax(x, axis='DEFAULT'):
@@ -2086,16 +2086,16 @@ class Mean(elemwise.CAReduce):
        return 'float64'
    def perform(self, node, (input, ), (output, )):
-      output[0]=numpy.mean(input,axis=self.axis)
+        output[0]=numpy.mean(input,axis=self.axis)
    def c_code(self, node, name, inames, onames, sub):
-      if self.axis!=None:
+        if self.axis!=None:
-        return super(Op, self).c_code(node, name, inames, onames, sub)
+            return super(Op, self).c_code(node, name, inames, onames, sub)
-      ret = elemwise.CAReduce.c_code(self, node, name, inames, onames, sub)
+        ret = elemwise.CAReduce.c_code(self, node, name, inames, onames, sub)
-      #TODO: c_code perform support only axis==None
+        #TODO: c_code perform support only axis==None
-      return ret + """
+        return ret + """
-*((double *)PyArray_DATA(%s)) /= PyArray_SIZE(%s);
+  *((double *)PyArray_DATA(%s)) /= PyArray_SIZE(%s);
-"""%(onames[0],inames[0])
+  """%(onames[0],inames[0])
 #TODO: implement the grad. When done and tested, you can make this the default version.
 #    def grad(self, (x,), (gout,)):
@@ -2114,11 +2114,11 @@ def mean(input, axis = None, op = False):
           mean, everything will be done on the gpu.
    """
    if op:
-      return Mean(axis)(input)
+        return Mean(axis)(input)
    if str(input.dtype).startswith('int'):
-        # we need to cast eventually anyway, and this helps
+            # we need to cast eventually anyway, and this helps
-        # to prevents overflow
+            # to prevents overflow
        input = cast(input, 'float64')
    s = sum(input, axis)
    shp = shape(input)
@@ -2183,10 +2183,10 @@ if 0:
            assert axis.type == iscalar
            broadcastable = []
            for i,x in enumerate(input.broadcastable):
-              if i==axis:
+                if i==axis:
-                broadcastable += [False]
+                    broadcastable += [False]
-              else:
+                else:
-                broadcastable += [x]
+                    broadcastable += [x]
            type = TensorType(dtype = input.type.dtype, broadcastable = \
                              broadcastable)
@@ -2360,46 +2360,46 @@ class Subtensor(Op):
    @staticmethod
    def convert(entry, slice_ok=True):
-      scal_types = [scal.int64, scal.int32, scal.int16, scal.int8]
+        scal_types = [scal.int64, scal.int32, scal.int16, scal.int8]
-      tensor_types = [bscalar, iscalar, lscalar]
+        tensor_types = [bscalar, iscalar, lscalar]
-      if isinstance(entry, gof.Variable) and entry.type in scal_types:
+        if isinstance(entry, gof.Variable) and entry.type in scal_types:
-        return entry.type
+            return entry.type
-      elif isinstance(entry, gof.Type) and entry in scal_types:
+        elif isinstance(entry, gof.Type) and entry in scal_types:
-        return entry
+            return entry
-      if isinstance(entry, gof.Variable) and entry.type in tensor_types and numpy.all(entry.type.broadcastable):
+        if isinstance(entry, gof.Variable) and entry.type in tensor_types and numpy.all(entry.type.broadcastable):
-        return scal.Scalar(entry.type.dtype)
+            return scal.Scalar(entry.type.dtype)
-      elif isinstance(entry, gof.Type) and entry in tensor_types and numpy.all(entry.broadcastable):
+        elif isinstance(entry, gof.Type) and entry in tensor_types and numpy.all(entry.broadcastable):
-        return scal.Scalar(entry.dtype)
+            return scal.Scalar(entry.dtype)
-      elif slice_ok and isinstance(entry, slice):
+        elif slice_ok and isinstance(entry, slice):
-        a = entry.start
+            a = entry.start
-        b = entry.stop
+            b = entry.stop
-        c = entry.step
+            c = entry.step
-        if a is not None:
+            if a is not None:
-          slice_a = Subtensor.convert(a, False)
+                slice_a = Subtensor.convert(a, False)
-        else:
+            else:
-          slice_a = None
+                slice_a = None
-        if b is not None:
+            if b is not None:
-           slice_b = Subtensor.convert(b, False)
+                slice_b = Subtensor.convert(b, False)
-        else:
+            else:
-           slice_b = None
+                slice_b = None
-        if c is not None:
+            if c is not None:
-           slice_c = Subtensor.convert(c, False)
+                slice_c = Subtensor.convert(c, False)
-        else:
+            else:
-           slice_c = None
+                slice_c = None
-        return slice(slice_a,slice_b,slice_c)
+            return slice(slice_a,slice_b,slice_c)
-          #backport
+                #backport
-          #return slice(Subtensor.convert(a, False) if a is not None else None,
+                #return slice(Subtensor.convert(a, False) if a is not None else None,
            #             Subtensor.convert(b, False) if b is not None else None,
            #             Subtensor.convert(c, False) if c is not None else None)
-      elif isinstance(entry, int):
+        elif isinstance(entry, int):
-        return entry
+            return entry
-      else:
+        else:
-        raise TypeError(Subtensor.e_indextype, entry)
+            raise TypeError(Subtensor.e_indextype, entry)
    def __init__(self, idx_list):
        self.idx_list = map(self.convert, idx_list)
@@ -2493,7 +2493,7 @@ class Subtensor(Op):
                if (idx.start is None or idx.start == 0)\
                    and (idx.stop is None or idx.stop == sys.maxint)\
                    and (idx.step is None or idx.step == 1):
-                        outshp.append(xl)
+                    outshp.append(xl)
                else:
                    # Not implemented yet
                    outshp.append(shape_i(i)(node.outputs[0]))
@@ -2517,10 +2517,10 @@ class Subtensor(Op):
        #TODO: optimize by cache this hash value
        msg = []
        for entry in self.idx_list:
-          if isinstance(entry, slice):
+            if isinstance(entry, slice):
-            msg += [(entry.start, entry.stop, entry.step)]
+                msg += [(entry.start, entry.stop, entry.step)]
-          else:
+            else:
-            msg += [entry]
+                msg += [entry]
        idx_list = tuple(msg)
        #backport
@@ -2568,19 +2568,19 @@ class SubtensorPrinter:
                    sidxs.append(inbrack_pstate.pprinter.process(inputs.pop()))
                elif isinstance(entry, slice):
                    if entry.start is None or entry.start==0:
-                      msg1 = ""
+                        msg1 = ""
                    else:
-                      msg1 =  entry.start
+                        msg1 =  entry.start
                    if entry.stop is None or entry.stop == sys.maxint:
-                      msg2 = ""
+                        msg2 = ""
                    else:
-                      msg2 =  entry.stop
+                        msg2 =  entry.stop
                    if entry.step is None:
-                      msg3 = ""
+                        msg3 = ""
                    else:
-                      msg3 =  ":%s" % entry.step
+                        msg3 =  ":%s" % entry.step
                    sidxs.append("%s:%s%s"  % (msg1, msg2, msg3))
                    #backport
@@ -2666,10 +2666,10 @@ class IncSubtensor(Op):
    def __hash__(self):
        msg = []
        for entry in self.idx_list:
-          if isinstance(entry, slice):
+            if isinstance(entry, slice):
-            msg += [(entry.start, entry.stop, entry.step)]
+                msg += [(entry.start, entry.stop, entry.step)]
-          else:
+            else:
-            msg += [entry]
+                msg += [entry]
        idx_list = tuple(msg)
        #backport
@@ -2848,7 +2848,7 @@ class Split(Op):
        """WRITEME"""
        #in python 2.4, x.shape[numpy.asarray(1)] don't work.
        if sys.version_info[0:2]==(2, 4) and axis.size==1:
-          axis=int(axis)
+            axis=int(axis)
        try:
            len_along_axis = x.shape[axis]
@@ -3032,8 +3032,8 @@ class Join(Op):
                # for the output.
                for x in as_tensor_variable_args:
                    for current_axis, bflag in enumerate(x.type.broadcastable):
-                        # Not sure if this Op supports/supported/will support
+                    # Not sure if this Op supports/supported/will support
-                        # negative indices, but just to be sure...
+                    # negative indices, but just to be sure...
                        if current_axis == axis % ndim:
                            continue
                        if bflag:
@@ -3103,9 +3103,9 @@ class Join(Op):
        if node.ndim != 1:
            raise TypeError('argument must be symbolic vector')
        if node.owner.tag.shape_zero is None:
-          raise ValueError("could not determine vector length")
+            raise ValueError("could not determine vector length")
        else:
-          return node.owner.tag.shape_zero
+            return node.owner.tag.shape_zero
 @_redefine_asRoutine(Join())
 def join(axis, *tensors):
@@ -3420,7 +3420,7 @@ def tile(x, reps, ndim=None):
    if not hasattr(tile, 'op'):
        tile.op = {}
    if ndim is None:
-      ndim = len(reps)
+        ndim = len(reps)
    #backport
    #ndim = len(reps) if ndim is None else ndim #not sure if len(shp) is going to work.
@@ -4404,9 +4404,9 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
    o_fn_out = o_fn(*[p.copy() for p in pt])
    if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list):
-            raise TypeError('It seems like you are trying to use verify_grad '
+        raise TypeError('It seems like you are trying to use verify_grad '
-                    'on an op or a function which outputs a list: there should'
+                'on an op or a function which outputs a list: there should'
-                    ' be a single (array-like) output instead')
+                ' be a single (array-like) output instead')
    # random_projection should not have elements too small,
    # otherwise too much precision is lost in numerical gradient

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1147,18 +1147,18 @@ def apply_rebroadcast_opt(rval):
    changed = True
    while changed and rval.owner:
-      changed = False
+        changed = False
-      rval2 = theano.tensor.opt.local_useless_rebroadcast.transform(rval.owner)
+        rval2 = theano.tensor.opt.local_useless_rebroadcast.transform(rval.owner)
-      if rval2:
-        assert len(rval2)==1
-        rval = rval2[0]
-        changed = True
-      if rval.owner:
-        rval2 = theano.tensor.opt.local_rebroadcast_lift.transform(rval.owner)
        if rval2:
-          assert len(rval2)==1
+            assert len(rval2)==1
-          rval = rval2[0]
+            rval = rval2[0]
-          changed = True
+            changed = True
+        if rval.owner:
+            rval2 = theano.tensor.opt.local_rebroadcast_lift.transform(rval.owner)
+            if rval2:
+                assert len(rval2)==1
+                rval = rval2[0]
+                changed = True
    return rval
@@ -1216,7 +1216,7 @@ def local_mul_switch_sink(node):
                    fct[0].values_eq_approx = fct[0].type.values_eq_approx_remove_nan
                    return fct
            except TypeError:
-               pass
+                pass
            try:
                if get_constant_value(switch.inputs[2]) == 0.:
                    listmul = node.inputs[:idx] + node.inputs[idx+1:]
@@ -2398,9 +2398,9 @@ def local_log_add(node):
 def add_calculate(num, denum, aslist = False, out_type=None):
    #TODO: make sure that this function and mul_calculate are similar
    if out_type is None:
-      zero = 0.0
+        zero = 0.0
    else:
-      zero = theano._asarray(0, dtype=out_type.dtype)
+        zero = theano._asarray(0, dtype=out_type.dtype)
    #zero = 0.0 if out_type is None else theano._asarray(0, dtype=out_type.dtype)
    v = reduce(N.add, num, zero) - reduce(N.add, denum, zero)
    if aslist:
@@ -2856,7 +2856,7 @@ def local_grad_log_erfc_neg(node):
            #The constant is valid. Must check that the
        elif erfc_x is not x:
-                return False
+            return False
    else:
        return False
@@ -3098,5 +3098,3 @@ if config.tensor.local_elemwise_fusion:
 else:
    _logger.debug("not enabling optimization fusion elemwise in fast_run")
    compile.optdb.register('elemwise_fusion', FusionOptimizer(local_elemwise_fusion), 71.00, 'fusion', 'local_elemwise_fusion')