Merge pull request #881 from nouiz/mixed3

Mixed3

Merge pull request #881 from nouiz/mixed3
6dcd4721 · lamblin · 9f6d9f90 · 13431f32 · 6dcd4721 · 6dcd4721
--- a/doc/optimizations.txt
+++ b/doc/optimizations.txt
@@ -16,22 +16,22 @@ The descriptions are brief and point to further reading.
 If you would like to add an additional optimization, refer to
 :ref:`optimization` in the guide to extending Theano.
-..  #COMMENT
+..  note::
-    Since the print_summary method has been added to several OpDBs and
+    This list is partial.
-    optimizers, it is possible to compute an accurate and up-to-date
-    optimization list by typing
+    The print_summary method allow several OpDBs and optimizers to list the optimization executed.
+    This allow to have an up-to-date list.
    python -c 'import theano; theano.compile.FAST_RUN.optimizer.print_summary()'
-    python -c 'import theano; theano.compile.FAST_COMPILE.optimizer.print_summary()'
-    etc.
+    python -c 'import theano; theano.compile.FAST_COMPILE.optimizer.print_summary()'
-========================================================= ========= ============
+========================================================= ========= ============ =============
-Optimization                                              FAST_RUN  FAST_COMPILE
+Optimization                                              FAST_RUN  FAST_COMPILE Stabilization
-========================================================= ========= ============
+========================================================= ========= ============ =============
 :term:`merge`                                             x         x
-:term:`constant folding<constant folding>`                x
+:term:`constant folding<constant folding>`                x         x
 :term:`shape promotion<shape promotion>`                  x
 :term:`fill cut<fill cut>`                                x
 :term:`inc_subtensor srlz.<inc_subtensor serialization>`  x
@@ -53,7 +53,8 @@ Optimization                                              FAST_RUN  FAST_COMPILE
 :term:`inplace_random`                                    x
 :term:`elemwise fusion`                                   x
 :term:`GPU transfer`                                      x
-========================================================= ========= ============
+:term:`local_log_softmax`                                 x                      x
+========================================================= ========= ============ =============
 .. glossary::
@@ -252,5 +253,8 @@ Optimization                                              FAST_RUN  FAST_COMPILE
        See :func:`theano.sandbox.cuda.opt.*`.
+    local_log_softmax
+        This is a stabilization optimization.
+        It can happen due to rounding problem that the softmax probability of one value get to 0.
+        Taking the log of 0, would generate -inf that will probably generate NaN later.
+        We return a closer answer.
--- a/doc/tutorial/using_gpu.txt
+++ b/doc/tutorial/using_gpu.txt
@@ -47,7 +47,10 @@ file and run it.
    t1 = time.time()
    print 'Looping %d times took' % iters, t1 - t0, 'seconds'
    print 'Result is', r
-    print 'Used the', 'cpu' if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
+    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
+        print 'Used the cpu'
+    else:
+        print 'Used the gpu'
 The program just computes the exp() of a bunch of random numbers.
 Note that we use the `shared` function to
@@ -105,7 +108,10 @@ after the T.exp(x) is replaced by a GPU version of exp().
    print 'Looping %d times took' % iters, t1 - t0, 'seconds'
    print 'Result is', r
    print 'Numpy result is', numpy.asarray(r)
-    print 'Used the', 'cpu' if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
+    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
+        print 'Used the cpu'
+    else:
+        print 'Used the gpu'
 The output from this program is
@@ -161,7 +167,10 @@ that it has the un-wanted side-effect of really slowing things down.
    print 'Looping %d times took' % iters, t1 - t0, 'seconds'
    print 'Result is', r
    print 'Numpy result is', numpy.asarray(r)
-    print 'Used the', 'cpu' if numpy.any([isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
+    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
+        print 'Used the cpu'
+    else:
+        print 'Used the gpu'
 Running this version of the code takes just under 0.05 seconds, over 140x faster than
 the CPU implementation!

--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -52,7 +52,7 @@ from gof import \
     Container, \
     InconsistencyError, FunctionGraph, \
     Apply, Variable, Constant, \
-     Op, \
+     Op, OpenMPOp,\
     opt, \
     toolbox, \
     Type, Generic, generic, \
@@ -106,9 +106,6 @@ if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
        import theano.sandbox.cuda.tests.test_driver
        theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
-import configdefaults_late
 # Use config.numpy to call numpy.seterr
 import numpy
 if config.numpy.seterr_all == 'None':

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -376,3 +376,39 @@ AddConfigVar('exception_verbosity',
                C. log_likelihood_h""",
        EnumStr('low', 'high'),
        in_c_key=False)
+#Test if the env variable is set
+var = os.getenv('OMP_NUM_THREADS', None)
+if var:
+    try:
+        int(var)
+    except ValueError:
+        raise TypeError("The environment variable OMP_NUM_THREADS"
+                        " should be a number, got '%s'." % var)
+    else:
+        default_openmp = not int(var) == 1
+else:
+    #Check the number of cores availables.
+    count = cpuCount()
+    if count == -1:
+        _logger.warning("We are not able to detect the number of CPU cores."
+                        " We disable openmp by default. To remove this"
+                        " warning, set the environment variable"
+                        " OMP_NUM_THREADS to the number of threads you"
+                        " want theano to use.")
+    default_openmp = count > 1
+AddConfigVar('openmp',
+             "Allow (or not) parallel computation on the CPU with OpenMP. "
+             "This is the default value used when creating an Op that "
+             "supports OpenMP parallelization. It is preferable to define it "
+             "via the Theano configuration file ~/.theanorc or with the "
+             "environment variable THEANO_FLAGS. Parallelization is only "
+             "done for some operations that implement it, and even for "
+             "operations that implement parallelism, each operation is free "
+             "to respect this flag or not. You can control the number of "
+             "threads used with the environment variable OMP_NUM_THREADS."
+             " If it is set to 1, we disable openmp in Theano by default.",
+             BoolParam(default_openmp),
+             in_c_key=False,
+         )
--- a/theano/configdefaults_late.py
+++ b/theano/configdefaults_late.py
-"""
-This file defines Theano flags which need to be defined late in import order.
-This is needed as they rely on the values of other previously-defined flags.
-"""
-import os
-import logging
-import subprocess
-import tempfile
-import theano
-from theano.configparser import (
-        AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam,
-        TheanoConfigParser)
-from theano.misc.cpucount import cpuCount
-_logger = logging.getLogger('theano.configdefaults_late')
-config = TheanoConfigParser()
-#http://pyprocessing.berlios.de/
-#True if the environment variable (OMP_NUM_THREADS!=1 or
-#if we detect more then 1 CPU core) and g++ support OpenMP
-#Otherwise False.
-default_openmp = True
-#Test if the env variable is set
-var = os.getenv('OMP_NUM_THREADS', None)
-if var:
-    try:
-        int(var)
-    except ValueError:
-        raise TypeError("The environment variable OMP_NUM_THREADS"
-                        " should be a number, got '%s'." % var)
-    else:
-        default_openmp = not int(var) == 1
-else:
-    #Check the number of cores availables.
-    count = cpuCount()
-    if count == -1:
-        _logger.warning("We are not able to detect the number of CPU cores."
-                        " We disable openmp by default. To remove this"
-                        " warning, set the environment variable"
-                        " OMP_NUM_THREADS to the number of threads you"
-                        " want theano to use.")
-    default_openmp = count > 1
-dummy_stdin = open(os.devnull)
-if default_openmp and theano.configdefaults.gxx_avail:
-    #check if g++ supports openmp. We need to compile a file as the EPD
-    #version has openmp enabled in the specs file but does not include
-    #the OpenMP files.
-    try:
-        code = """
-        #include <omp.h>
-int main( int argc, const char* argv[] )
-{
-        int res[10];
-        for(int i=0; i < 10; i++){
-            res[i] = i;
-        }
-}
-        """
-        fd, path = tempfile.mkstemp(suffix='.c', prefix='test_omp_')
-        try:
-            os.write(fd, code)
-            os.close(fd)
-            fd = None
-            proc = subprocess.Popen(['g++', '-fopenmp', path],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE,
-                                    stdin=dummy_stdin.fileno())
-            proc.wait()
-            if proc.returncode != 0:
-                default_openmp = False
-        finally:
-            # Ensure `fd` is closed before we remove the temporary file.
-            try:
-                if fd is not None:
-                    os.close(fd)
-            finally:
-                os.remove(path)
-    except OSError, e:
-        default_openmp = False
-del dummy_stdin
-AddConfigVar('openmp',
-             "Enable (or not) parallel computation on the CPU with OpenMP. "
-             "This is the default value used when creating an Op that "
-             "supports OpenMP parallelization. It is preferable to define it "
-             "via the Theano configuration file ~/.theanorc or with the "
-             "environment variable THEANO_FLAGS. Parallelization is only "
-             "done for some operations that implement it, and even for "
-             "operations that implement parallelism, each operation is free "
-             "to respect this flag or not.",
-             BoolParam(default_openmp),
-             in_c_key=False,
-         )
--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -55,7 +55,7 @@ from link import \
    Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
 from op import \
-    Op, PureOp, ops_with_inner_function
+    Op, OpenMPOp, PureOp, ops_with_inner_function
 from opt import (Optimizer, optimizer, SeqOptimizer,
    MergeOptimizer, MergeOptMerge,

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -11,8 +11,11 @@ __contact__   = "theano-dev <theano-dev@googlegroups.com>"
 __docformat__ = "restructuredtext en"
+import copy
 import logging
+import os
+import subprocess
+import tempfile
 import warnings
 import theano
@@ -781,3 +784,91 @@ We need that to be able not to run debug checks a number of times that is
 exponential in the nesting level of those ops.
 For instance, Scan will be registered here.
 """
+class OpenMPOp(Op):
+    """All op using OpenMP code should inherit from this Op.
+    This op will check that the compiler support correctly OpenMP code.
+    If not, it will print a warning and disable openmp for this Op.
+    Then it will generate the not OpenMP code.
+    This is needed as EPD on Windows g++ version spec information tell
+    it support OpenMP, but does not include the OpenMP files.
+    We also add the correct compiler flags in c_compile_args.
+    """
+    gxx_support_openmp = None
+    """
+    True/False after we tested this.
+    """
+    def __init__(self, openmp=None):
+        if openmp is None:
+            openmp = theano.config.openmp
+        self.openmp = openmp
+    def c_compile_args(self):
+        if self.openmp:
+            return ['-fopenmp']
+        return []
+    @staticmethod
+    def test_gxx_support():
+        try:
+            code = """
+            #include <omp.h>
+    int main( int argc, const char* argv[] )
+    {
+            int res[10];
+            for(int i=0; i < 10; i++){
+                res[i] = i;
+            }
+    }
+            """
+            fd, path = tempfile.mkstemp(suffix='.c', prefix='test_omp_')
+            dummy_stdin = open(os.devnull)
+            try:
+                os.write(fd, code)
+                os.close(fd)
+                fd = None
+                proc = subprocess.Popen(['g++', '-fopenmp', path],
+                                        stdout=subprocess.PIPE,
+                                        stderr=subprocess.PIPE,
+                                        stdin=dummy_stdin.fileno())
+                proc.wait()
+                if proc.returncode != 0:
+                    default_openmp = False
+            finally:
+                del dummy_stdin
+                # Ensure `fd` is closed before we remove the temporary file.
+                try:
+                    if fd is not None:
+                        os.close(fd)
+                finally:
+                    os.remove(path)
+        except OSError, e:
+            return False
+        return True
+    def make_thunk(self, node, storage_map, compute_map, no_recycling):
+        op = self
+        if self.openmp:
+            if OpenMPOp.gxx_support_openmp is None:
+                OpenMPOp.gxx_support_openmp = OpenMPOp.test_gxx_support()
+                if not OpenMPOp.gxx_support_openmp:
+                    #We want to warn only once.
+                    warnings.warn(
+                        "Your g++ compiler fails to compile OpenMP code. We"
+                        " know this happen with some version of the EPD mingw"
+                        " compiler. We disable openmp everywhere in Theano."
+                        " To remove this warning set the theano flags `openmp`"
+                        " to False.")
+            if OpenMPOp.gxx_support_openmp is False:
+                op = copy.copy(self)
+                op.openmp = False
+                theano.config.openmp = False
+        return super(OpenMPOp, op).make_thunk(node, storage_map,
+                                              compute_map, no_recycling)
--- a/theano/gof/optdb.py
+++ b/theano/gof/optdb.py
 import StringIO
 import sys
-if sys.version_info[:2] >= (2, 5):
+from python25 import DefaultOrderedDict
-    from collections import defaultdict
-else:
-    from python25 import defaultdict
 import numpy
 import opt
@@ -29,7 +26,7 @@ class DB(object):
        return self._optimizer_idx
    def __init__(self):
-        self.__db__ = defaultdict(set)
+        self.__db__ = DefaultOrderedDict(set)
        self._names = set()
        self.name = None  # will be reset by register
        #(via obj.name by the thing doing the registering)

--- a/theano/gof/python25.py
+++ b/theano/gof/python25.py
@@ -158,3 +158,176 @@ if sys.version_info[:2] < (2, 6):
 else:
    from itertools import combinations, product
    from sys import maxsize
+if sys.version_info[:2] < (2, 7):
+    # The following implementation of OrderedDict compatible with python 2.4
+    # was taked from http://pypi.python.org/pypi/ordereddict/1.1
+    # It is under the MIT license.
+    # Copyright (c) 2009 Raymond Hettinger
+    #
+    # Permission is hereby granted, free of charge, to any person
+    # obtaining a copy of this software and associated documentation files
+    # (the "Software"), to deal in the Software without restriction,
+    # including without limitation the rights to use, copy, modify, merge,
+    # publish, distribute, sublicense, and/or sell copies of the Software,
+    # and to permit persons to whom the Software is furnished to do so,
+    # subject to the following conditions:
+    #
+    #     The above copyright notice and this permission notice shall be
+    #     included in all copies or substantial portions of the Software.
+    #
+    #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    #     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+    #     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    #     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    #     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    #     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    #     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    #     OTHER DEALINGS IN THE SOFTWARE.
+    from UserDict import DictMixin
+    class OrderedDict(dict, DictMixin):
+        def __init__(self, *args, **kwds):
+            if len(args) > 1:
+                raise TypeError('expected at most 1 arguments, got %d' %
+                                len(args))
+            try:
+                self.__end
+            except AttributeError:
+                self.clear()
+            self.update(*args, **kwds)
+        def clear(self):
+            self.__end = end = []
+            end += [None, end, end]      # sentinel node for doubly linked list
+            self.__map = {}              # key --> [key, prev, next]
+            dict.clear(self)
+        def __setitem__(self, key, value):
+            if key not in self:
+                end = self.__end
+                curr = end[1]
+                curr[2] = end[1] = self.__map[key] = [key, curr, end]
+            dict.__setitem__(self, key, value)
+        def __delitem__(self, key):
+            dict.__delitem__(self, key)
+            key, prev, next = self.__map.pop(key)
+            prev[2] = next
+            next[1] = prev
+        def __iter__(self):
+            end = self.__end
+            curr = end[2]
+            while curr is not end:
+                yield curr[0]
+                curr = curr[2]
+        def __reversed__(self):
+            end = self.__end
+            curr = end[1]
+            while curr is not end:
+                yield curr[0]
+                curr = curr[1]
+        def popitem(self, last=True):
+            if not self:
+                raise KeyError('dictionary is empty')
+            if last:
+                key = reversed(self).next()
+            else:
+                key = iter(self).next()
+            value = self.pop(key)
+            return key, value
+        def __reduce__(self):
+            items = [[k, self[k]] for k in self]
+            tmp = self.__map, self.__end
+            del self.__map, self.__end
+            inst_dict = vars(self).copy()
+            self.__map, self.__end = tmp
+            if inst_dict:
+                return (self.__class__, (items,), inst_dict)
+            return self.__class__, (items,)
+        def keys(self):
+            return list(self)
+        setdefault = DictMixin.setdefault
+        update = DictMixin.update
+        pop = DictMixin.pop
+        values = DictMixin.values
+        items = DictMixin.items
+        iterkeys = DictMixin.iterkeys
+        itervalues = DictMixin.itervalues
+        iteritems = DictMixin.iteritems
+        def __repr__(self):
+            if not self:
+                return '%s()' % (self.__class__.__name__,)
+            return '%s(%r)' % (self.__class__.__name__, self.items())
+        def copy(self):
+            return self.__class__(self)
+        @classmethod
+        def fromkeys(cls, iterable, value=None):
+            d = cls()
+            for key in iterable:
+                d[key] = value
+            return d
+        def __eq__(self, other):
+            if isinstance(other, OrderedDict):
+                if len(self) != len(other):
+                    return False
+                for p, q in  zip(self.items(), other.items()):
+                    if p != q:
+                        return False
+                return True
+            return dict.__eq__(self, other)
+    def __ne__(self, other):
+        return not self == other
+else:
+    from UserDict import DictMixin
+    OrderedDict = collections.OrderedDict
+    from collections import Callable
+class DefaultOrderedDict(OrderedDict):
+    def __init__(self, default_factory=None, *a, **kw):
+        if (default_factory is not None and
+            not callable(default_factory)):
+            raise TypeError('first argument must be callable')
+        OrderedDict.__init__(self, *a, **kw)
+        self.default_factory = default_factory
+    def __getitem__(self, key):
+        try:
+            return OrderedDict.__getitem__(self, key)
+        except KeyError:
+            return self.__missing__(key)
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError(key)
+        self[key] = value = self.default_factory()
+        return value
+    def __reduce__(self):
+        if self.default_factory is None:
+            args = tuple()
+        else:
+            args = self.default_factory,
+        return type(self), args, None, None, self.items()
+    def copy(self):
+        return self.__copy__()
+    def __copy__(self):
+        return type(self)(self.default_factory, self)
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -359,6 +359,7 @@ def use(device,
                assert isinstance(device, int)
                gpu_init(device)
                use.device_number = device
+                assert active_device_number() == device
            else:
                # This mean the driver should select the GPU.  As we
                # need to get the device number now, we force the
@@ -379,7 +380,16 @@ def use(device,
            if enable_cuda:
                cuda_enabled = True
            print >> sys.stderr, "Using gpu device %d: %s" % (
-                active_device_number(), active_device_name())
+                use.device_number, active_device_name())
+            if device_properties(use.device_number)['regsPerBlock'] < 16384:
+                # We will try to use too much register per bloc at many places
+                # when there is only 8k register per multi-processor.
+                _logger.warning("You are probably using an old GPU."
+                                " We didn't optimize nor we support those GPU."
+                                " This mean GPU code will be slow AND will"
+                                " crash when we try to use feature/properties"
+                                " that your GPU don't support.")
        except (EnvironmentError, ValueError, RuntimeError), e:
            _logger.error(("ERROR: Not using GPU."
                           " Initialisation of device %s failed:\n%s"),

--- a/theano/sandbox/cuda/neighbours.py
+++ b/theano/sandbox/cuda/neighbours.py
--- a/theano/sandbox/cuda/tests/test_neighbours.py
+++ b/theano/sandbox/cuda/tests/test_neighbours.py
+# Skip test if cuda_ndarray is not available.
+from nose.plugins.skip import SkipTest
+import numpy
+import theano
+import theano.sandbox.cuda as cuda_ndarray
+if cuda_ndarray.cuda_available == False:
+    raise SkipTest('Optional package cuda disabled')
+import theano.sandbox.test_neighbours
+from theano.sandbox.cuda.neighbours import GpuImages2Neibs
+if theano.config.mode == 'FAST_COMPILE':
+    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
+else:
+    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
+class T_GpuImages2Neibs(theano.sandbox.test_neighbours.T_Images2Neibs):
+    def __init__(self, name):
+        self.mode = mode_with_gpu
+        self.op = GpuImages2Neibs
+        return super(T_GpuImages2Neibs, self).__init__(name)
+if __name__ == '__main__':
+    unittest.main()
--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
--- a/theano/sandbox/test_neighbours.py
+++ b/theano/sandbox/test_neighbours.py
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -5391,11 +5391,13 @@ class Reshape(Op):
            requ = list(requ.data)
            requ_part = [ele for ele in requ if ele != -1]
            crit = len(requ) - len(requ_part)
-            if crit == 1:
+            if crit == 1 and len(requ_part) > 0:
-                missing = numpy.prod(ishapes[0]) / numpy.prod(requ_part)
+                missing = mul(*ishapes[0]) / mul(*requ_part)
                for i, ele in enumerate(requ):
                    if ele == -1:
                        requ[i] = missing
+            elif crit == 1:  # we reshape to -1
+                requ = [mul(*ishapes[0])]
            elif crit > 1:
                raise ValueError('shape argument to Reshape.perform'
                    ' must have at most one entry equal to -1')

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -963,6 +963,9 @@ class ShapeFeature(object):
        for sh_idx, sh in enumerate(o_shapes):
            if sh is None:
                continue
+            if not isinstance(sh, (list, tuple)):
+                raise ValueError("infer_shape of %s didn't return a list of"
+                                 " list. It returned '%s'" % (str(node), str(o_shapes)))
            for i, d in enumerate(sh):
                # Note: we ignore any shape element that is not typed (i.e.,
                # does not have a 'dtype' attribute). This means there may

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -4456,6 +4456,7 @@ class T_op_cache(unittest.TestCase):
        a = rand(5,2).astype(config.floatX)
        self.assertTrue(numpy.all(fn_py(a) == fn_c_or_py(a)))
 class T_reshape(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -4469,10 +4470,10 @@ class T_reshape(unittest.TestCase):
        c = reshape(b, as_tensor_variable(6), ndim=1)
        f = inplace_func([b], c)
-        b_val1 = numpy.asarray([[0,1,2],[3,4,5]])
+        b_val1 = numpy.asarray([[0, 1, 2], [3, 4, 5]])
-        c_val1 = numpy.asarray([0,1,2,3,4,5])
+        c_val1 = numpy.asarray([0, 1, 2, 3, 4, 5])
        b_val2 = b_val1.T
-        c_val2 = numpy.asarray([0,3,1,4,2,5])
+        c_val2 = numpy.asarray([0, 3, 1, 4, 2, 5])
        f_out1 = f(b_val1)
        f_out2 = f(b_val2)
@@ -4484,78 +4485,88 @@ class T_reshape(unittest.TestCase):
        #basic to 1 dim(with list)
        c = reshape(b, (as_tensor_variable(6),), ndim=1)
        f = inplace_func([b], c)
-        assert numpy.all(f(numpy.asarray([[0,1,2],[3,4,5]])) == numpy.asarray([0,1,2,3,4,5]))
+        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
+                         numpy.asarray([0, 1, 2, 3, 4, 5]))
        #print f.maker.fgraph.toposort()
        #check that we remove the useless reshape
        #basic to shape object of same ndim
-        c = reshape(b,d.shape)
+        c = reshape(b, d.shape)
-        f = inplace_func([b,d], c)
+        f = inplace_func([b, d], c)
-        assert numpy.all(f(numpy.asarray([[0,1,2],[3,4,5]]),[[0,1],[2,3],[4,5]]) == numpy.asarray([[0,1],[2,3],[4,5]]))
+        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]]),
+                           [[0, 1], [2, 3], [4, 5]]) ==
+                         numpy.asarray([[0, 1], [2, 3], [4, 5]]))
        #basic to 2 dims
-        c = reshape(a, [2,3])
+        c = reshape(a, [2, 3])
        f = inplace_func([a], c)
-        assert numpy.all(f(numpy.asarray([0,1,2,3,4,5])) == numpy.asarray([[0,1,2], [3,4,5]]))
+        assert numpy.all(f(numpy.asarray([0, 1, 2, 3, 4, 5])) ==
+                         numpy.asarray([[0, 1, 2], [3, 4, 5]]))
        #test that it works without inplace operations
-        a_val = numpy.asarray([0,1,2,3,4,5])
+        a_val = numpy.asarray([0, 1, 2, 3, 4, 5])
-        a_val_copy = numpy.asarray([0,1,2,3,4,5])
+        a_val_copy = numpy.asarray([0, 1, 2, 3, 4, 5])
-        b_val = numpy.asarray([[0,1,2],[3,4,5]])
+        b_val = numpy.asarray([[0, 1, 2], [3, 4, 5]])
-        f_sub = inplace_func([a,b], c-b)
+        f_sub = inplace_func([a, b], c - b)
        assert numpy.all(f_sub(a_val, b_val) == 0.0)
        assert numpy.all(a_val == a_val_copy)
        #test that it works with inplace operations
-        a_val = theano._asarray([0,1,2,3,4,5], dtype='float64')
+        a_val = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
-        a_val_copy = theano._asarray([0,1,2,3,4,5], dtype='float64')
+        a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
-        b_val = theano._asarray([[0,1,2],[3,4,5]], dtype='float64')
+        b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
-        f_sub = inplace_func([a,b], c-b)
+        f_sub = inplace_func([a, b], c - b)
        assert numpy.all(f_sub(a_val, b_val) == 0.0)
        assert numpy.all(a_val == a_val_copy)
        # verify gradient
        def just_vals(v):
-            return Reshape(2)(v, theano._asarray([2,3], dtype='int32'))
+            return Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
        utt.verify_grad(just_vals, [a_val])
        #test infer_shape
-        f_sub = function([a,b], (c-b).shape)
+        f_sub = function([a, b], (c - b).shape)
-        if config.mode=="FAST_COMPILE":
+        if config.mode == "FAST_COMPILE":
-            assert len(f_sub.maker.fgraph.toposort())==3
+            assert len(f_sub.maker.fgraph.toposort()) == 3
        else:
            topo = f_sub.maker.fgraph.toposort()
-            assert len(topo)==1
+            assert len(topo) == 1
            topo[0].op == theano.compile.function_module.deep_copy_op
            #assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5]]))==[2,3])#work in FAST_RUN, but fail on other!
            #assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5],[6,7]]))==[2,3])#work in FAST_RUN, but fail on other!
        # test broadcast flag for constant value of 1
-        c = reshape(b, (b.shape[0],b.shape[1],1))
+        c = reshape(b, (b.shape[0], b.shape[1], 1))
        f = inplace_func([b], c)
-        assert numpy.all(f(numpy.asarray([[0,1,2],[3,4,5]])) == numpy.asarray([[[0],[1],[2]],[[3],[4],[5]]]))
+        assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
-        assert f.maker.fgraph.toposort()[-2].outputs[0].type.broadcastable==(False, False, True)
+                         numpy.asarray([[[0], [1], [2]], [[3], [4], [5]]]))
+        assert (f.maker.fgraph.toposort()[-2].outputs[0].type.broadcastable ==
+                (False, False, True))
-        assert numpy.all(f_sub(a_val,b_val)==[2,3])
+        assert numpy.all(f_sub(a_val, b_val) == [2, 3])
-    def test_infer_shape(self):
+    def test_bad_shape(self):
        a = matrix('a')
        shapes = ivector('shapes')
        ndim = 2
+        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        a_val = rng.uniform(size=(3, 4)).astype(config.floatX)
-        r = a.reshape(shapes, ndim=2)
+        #Test reshape to 1 dim
+        r = a.reshape(shapes, ndim=1)
        z = zeros_like(r)
        f = function([a, shapes], z.shape)
+        self.assertRaises(ValueError, f, a_val, [13])
-        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        #Test reshape to 1 dim
-        a_val = rng.uniform(size=(3, 4)).astype(config.floatX)
+        r = a.reshape(shapes, ndim=2)
+        z = zeros_like(r)
+        f = function([a, shapes], z.shape)
-        self.assertTrue((f(a_val, [4, 3]) == [4, 3]).all())
-        self.assertTrue((f(a_val, [-1, 3]) == [4, 3]).all())
-        self.assertTrue((f(a_val, [4, -1]) == [4, 3]).all())
        self.assertRaises(ValueError, f, a_val, [-1, 5])
        self.assertRaises(ValueError, f, a_val, [7, -1])
        self.assertRaises(ValueError, f, a_val, [7, 5])
@@ -4577,8 +4588,8 @@ def test_flatten_outdimNone():
    a = dmatrix()
    c = flatten(a)
    f = inplace_func([a], c)
-    a_val = theano._asarray([[0,1,2],[3,4,5]], dtype='float64')
+    a_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
-    c_val = theano._asarray([0,1,2,3,4,5], dtype='float64')
+    c_val = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
    assert numpy.all(f(a_val)==c_val)
    f = inplace_func([a], c)
    assert numpy.all(f(a_val)==c_val)
@@ -4601,8 +4612,8 @@ def test_flatten_outdim1():
    a = dmatrix()
    c = flatten(a, 1)
    f = inplace_func([a], c)
-    a_val = theano._asarray([[0,1,2],[3,4,5]], dtype='float64')
+    a_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
-    c_val = theano._asarray([0,1,2,3,4,5], dtype='float64')
+    c_val = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
    assert numpy.all(f(a_val)==c_val)
    f = inplace_func([a], c)
    assert numpy.all(f(a_val)==c_val)
@@ -4613,7 +4624,7 @@ def test_flatten_outdim2():
    a = dmatrix()
    c = flatten(a, 2)
    f = inplace_func([a], c)
-    a_val = theano._asarray([[0,1,2],[3,4,5]], dtype='float64')
+    a_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
    assert numpy.all(f(a_val)==a_val)
    f = inplace_func([a], c)
    assert numpy.all(f(a_val)==a_val)
@@ -6679,8 +6690,17 @@ class TestInferShape(utt.InferShapeTester):
        # (non-constant) input shape
        admat = dmatrix()
        aivec = ivector()
-        ndim = 2
+        ndim = 1
        admat_val = rand(3, 4)
+        self._compile_and_check([admat],
+                                [Reshape(ndim)(admat, [12])],
+                                [admat_val], Reshape)
+        self._compile_and_check([admat],
+                                [Reshape(ndim)(admat, [-1])],
+                                [admat_val], Reshape)
+        ndim = 2
        self._compile_and_check([admat],
                                [Reshape(ndim)(admat, [4, 3])],
                                [admat_val], Reshape)
@@ -6689,6 +6709,17 @@ class TestInferShape(utt.InferShapeTester):
                                [Reshape(ndim)(admat, [4, -1])],
                                [admat_val], Reshape)
+        self._compile_and_check([admat],
+                                [Reshape(ndim)(admat, [3, -1])],
+                                [admat_val], Reshape)
+        self._compile_and_check([admat],
+                                [Reshape(ndim)(admat, [-1, 3])],
+                                [admat_val], Reshape)
+        self._compile_and_check([admat],
+                                [Reshape(ndim)(admat, [-1, 4])],
+                                [admat_val], Reshape)
        # enable when infer_shape is generalized:
        # self._compile_and_check([admat, aivec],
        #                        [Reshape(ndim)(admat, aivec)],

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -132,25 +132,6 @@ class test_dimshuffle_lift(unittest.TestCase):
            "{x,0,1}(y)), z)]"), str(g))
-def test_stabilize_log_softmax():
-    mode = theano.compile.mode.get_default_mode()
-    mode = mode.including('local_log_softmax')
-    x = matrix()
-    y = theano.tensor.nnet.softmax(x)
-    z = theano.tensor.log(y)
-    f = function([x],z)
-    #check that the softmax has been optimized out
-    for node in f.maker.fgraph.toposort():
-        assert not isinstance(node.op, y.owner.op.__class__)
-    #call the function so debug mode can verify the optimized
-    #version matches the unoptimized version
-    rng = numpy.random.RandomState([2012,8,22])
-    f(numpy.cast[config.floatX](rng.randn(2,3)))
 def test_add_canonizer_problem0():
    n_segments = 10
    label = lscalar('label')

--- a/theano/tests/run_tests_in_batch.py
+++ b/theano/tests/run_tests_in_batch.py
@@ -115,15 +115,9 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile):
    # Setting aside current working directory for later saving
    sav_dir = os.getcwd()
-    if len(argv) == 1:
+    # The first argument is the called script.
-        tests_dir = theano.__path__[0]
+    argv = argv[1:]
-        other_args = []
-    else:
-        # tests_dir should be at the end of argv, there can be other arguments
-        tests_dir = argv[-1]
-        other_args = argv[1:-1]
-        assert os.path.isdir(tests_dir)
-    os.chdir(tests_dir)
    # It seems safer to fully regenerate the list of tests on each call.
    if os.path.isfile('.noseids'):
        os.remove('.noseids')
@@ -142,7 +136,7 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile):
    python = sys.executable
    rval = subprocess.call(
        ([python, theano_nose, '--collect-only', '--with-id']
-         + other_args),
+         + argv),
        stdin=dummy_in.fileno(),
        stdout=stdout.fileno(),
        stderr=stderr.fileno())
@@ -172,7 +166,7 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile):
            rval = subprocess.call(
                ([python, theano_nose, '-q', '--with-id']
                 + map(str, test_range)
-                 + other_args),
+                 + argv),
                stdout=dummy_out.fileno(),
                stderr=dummy_out.fileno(),
                stdin=dummy_in.fileno())
@@ -198,7 +192,7 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile):
            subprocess.call(
                ([python, theano_nose, '-v', '--with-id']
                 + failed
-                 + other_args),
+                 + argv),
                stdin=dummy_in.fileno(),
                stdout=stdout.fileno(),
                stderr=stderr.fileno())
@@ -240,7 +234,7 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile):
                                                 n_tests + 1)):
                proc = subprocess.Popen(
                    ([python, theano_nose, '-v', '--with-id']
-                    + [str(test_id)] + other_args +
+                    + [str(test_id)] + argv +
                     ['--disabdocstring']),
                    # the previous option calls a custom Nosetests plugin
                    # precluding automatic sustitution of doc. string for