Merge remote-tracking branch 'central/master' into rc1

Conflicts: NEWS.txt

Merge remote-tracking branch 'central/master' into rc1
6dbe4953 · Frederic · 5f9efce1 · 63a59ac1 · 6dbe4953 · 6dbe4953
--- a/NEWS.txt
+++ b/NEWS.txt
--- a/bin/theano-cache
+++ b/bin/theano-cache
@@ -28,10 +28,14 @@ elif sys.argv[1] in ('clear'):
                      (len(items), ', '.join(items)))
 elif sys.argv[1] in ('list'):
    theano.gof.compiledir.print_compiledir_content()
+elif sys.argv[1] == 'unlock':
+    theano.gof.compilelock.force_unlock()
+    print 'Lock successfully removed!'
 else:
    print 'command "%s" not recognized' % sys.argv[1]
    print 'Type "theano-cache" to print the cache location'
    print 'Type "theano-cache clear" to erase the cache'
    print 'Type "theano-cache list" to print the cache content'
+    print 'Type "theano-cache unlock" to unlock the cache directory'
    sys.exit(1)
--- a/doc/install.txt
+++ b/doc/install.txt
@@ -726,7 +726,7 @@ Currently, due to memory fragmentation issue in Windows, the
 test-suite breaks at some point when using ``nosetests``, with many error
 messages looking
 like: ``DLL load failed: Not enough storage is available to process this
-command``. As a result, you should instead run
+command``. As a workaround, you can instead run:
    .. code-block:: bash
@@ -736,6 +736,13 @@ This will run tests in batches of 100, which should avoid memory errors.
 Note that this script calls ``nosetests``, which may require being run from
 within a MinGW shell if you installed Nose manually as described above.
+.. note::
+    The above workaround to run tests with the ``run_tests_in_batch.py`` script
+    is currently imperfect: some tests are not properly collected by nosetests
+    in this mode. This may result in some weird test failures starting with
+    ``ERROR: Failure: OSError``. We do not yet have a fix for this problem.
 Editing code in Visual Studio
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/theano/gof/compilelock.py
+++ b/theano/gof/compilelock.py
@@ -21,9 +21,27 @@ timeout_before_override = 120
 # 'refresh_every' seconds.
 refresh_every = 60
-def get_lock():
+def force_unlock():
+    """
+    Delete the compilation lock if someone else has it.
+    """
+    global timeout_before_override
+    timeout_backup = timeout_before_override
+    timeout_before_override = 0
+    try:
+        get_lock(min_wait=0, max_wait=0.001)
+        release_lock()
+    finally:
+        timeout_before_override = timeout_backup
+def get_lock(**kw):
    """
    Obtain lock on compilation directory.
+    :param kw: Additional arguments to be forwarded to the `lock` function when
+    acquiring the lock.
    """
    if not hasattr(get_lock, 'n_lock'):
        # Initialization.
@@ -47,7 +65,7 @@ def get_lock():
    if get_lock.lock_is_enabled:
        # Only really try to acquire the lock if we do not have it already.
        if get_lock.n_lock == 0:
-            lock(get_lock.lock_dir, timeout = timeout_before_override)
+            lock(get_lock.lock_dir, timeout=timeout_before_override, **kw)
            atexit.register(Unlocker.unlock, get_lock.unlocker)
            # Store time at which the lock was set.
            get_lock.start_time = time.time()

--- a/theano/misc/do_nightly_build
+++ b/theano/misc/do_nightly_build
@@ -5,6 +5,12 @@ START=`date +%s`
 NOSETESTS=nosetests
 ARGS=$@
 PROFILING=""
+RELEASE=""
+if [ "$1" == "--release" ]; then
+    RELEASE="True"
+    shift
+    ARGS=$@
+fi
 if [ "$1" == "--buildbot" ]; then
    ROOT_CWD=/Tmp/nightly_build
    FLAGS=compiledir=/Tmp/lisa_theano_compile_dir_theano
@@ -17,7 +23,10 @@ fi
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l
+# We don't want warning for fixed error in the buildbot
 FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,$FLAGS
+# We want to see correctly optimization error, so make make them raise an error
+FLAGS=on_opt_error=raise,$FLAGS
 # Ignore user device and floatX config, because:
 #   1. Tests are intended to be run with device=cpu.
 #   2. We explicitly add 'floatX=float32' in one run of the test suite below,
@@ -25,14 +34,23 @@ FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug
 FLAGS=${FLAGS},device=cpu,floatX=float64
 export PYTHONPATH=${ROOT_CWD}:$PYTHONPATH
+if [ "$RELEASE" ]; then
+    echo "Executing nosetests with default mode and compute_test_value"
+    THEANO_FLAGS=${FLAGS},compute_test_value=ignore ${NOSETESTS} ${ARGS}
+    echo "Number of elements in the compiledir:"
+    ls ${COMPILEDIR}|wc -l
+fi
 echo "Executing nosetests with mode=FAST_COMPILE"
 THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l
 echo "Executing nosetests with mode=FAST_RUN"
 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l
 echo "Executing nosetests with mode=FAST_RUN,floatX=float32"
 THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${ARGS}
 echo "Number of elements in the compiledir:"

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -555,8 +555,14 @@ class test_structureddot(unittest.TestCase):
 class DotTests(unittest.TestCase):
    def setUp(self):
-        x_size = (10, 1000)
+        # On 32-bit platforms we use smaller matrices to avoid running out of
-        y_size = (1000, 10000)
+        # memory during tests.
+        if theano.gof.cmodule.local_bitwidth() <= 32:
+            x_size = (10, 100)
+            y_size = (100, 1000)
+        else:
+            x_size = (10, 1000)
+            y_size = (1000, 10000)
        self.x_csr = scipy.sparse.csr_matrix(
            numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -890,21 +890,28 @@ def res_is_a(node, op, maxclients=None):
              and retval
-def _as_scalar(res):
+def _as_scalar(res, dtype=None):
    """Return None or a TensorVariable whose type is in T.float_scalar_types"""
+    if dtype is None:
+        dtype = config.floatX
    if numpy.all(res.type.broadcastable):
        while res.owner and isinstance(res.owner.op, T.DimShuffle):
            res = res.owner.inputs[0]
-        if res.type.broadcastable: # may still have some number of True's
+        # may still have some number of True's
+        if res.type.broadcastable:
            rval = res.dimshuffle()
        else:
            rval = res
        if rval.type.dtype[:3] in ('int', 'uin'):
-            rval = cast(rval, theano.config.floatX) #may lose precision !?
+            # We check that the upcast of res and dtype won't change dtype.
+            # If dtype is float64, we will cast int64 to float64.
-        #if isinstance(rval, T.Constant):
+            # This is valid when res is a scalar used as input to a dot22
-            #rval = rval.data.flatten()[0]
+            # as the cast of the scalar can be done before or after the dot22
+            #  and this will give the same result.
+            if theano.scalar.upcast(res.dtype, dtype) == dtype:
+                return T.cast(rval, dtype)
+            else:
+                return None
        return rval
@@ -1567,7 +1574,7 @@ def local_dot22_to_dot22scalar(node):
        #return False #TODO fix
    dot22_idx = i_dot22.index(True)
    d = node.inputs[dot22_idx]
-    i_scalar = [_as_scalar(x) for x in node.inputs]
+    i_scalar = [_as_scalar(x, dtype=d.dtype) for x in node.inputs]
    if not any(i_scalar):
        i_mul = [x.owner and x.owner.op ==T.mul for x in node.inputs]
        if not any(i_mul):
@@ -1581,10 +1588,10 @@ def local_dot22_to_dot22scalar(node):
        mul_idx = i_mul.index(True)#we take the first mul!
        m = node.inputs[mul_idx]
-        if len(m.owner.inputs)==2 and any([_as_scalar(x) for x in m.owner.inputs]):
+        if len(m.owner.inputs)==2 and any([_as_scalar(x, dtype=d.dtype) for x in m.owner.inputs]):
            scalar_idx = -1
            for i,x in enumerate(m.owner.inputs):
-                if _as_scalar(x) and (theano.scalar.upcast(x.type.dtype,d.type.dtype)
+                if _as_scalar(x, dtype=d.dtype) and (theano.scalar.upcast(x.type.dtype,d.type.dtype)
                                      == d.type.dtype):
                    scalar_idx = i
                    break
@@ -1594,7 +1601,7 @@ def local_dot22_to_dot22scalar(node):
                             'of the scalar cannot be upcasted to the matrix type',
                             node.inputs, [x.type for x in node.inputs])
                return False
-            a = T.cast(_as_scalar(m.owner.inputs[scalar_idx]), d.type.dtype)
+            a = T.cast(_as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype)
            assert not a.type.ndim
            dot=_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -920,6 +920,28 @@ class ShapeFeature(object):
                    + ' != len(node.outputs) = '
                    + str(len(node.outputs)))
+        # Ensure shapes are in 'int64'. This is to make sure the assert
+        # found in the `local_useless_subtensor` optimization does not fail.
+        new_shape = []
+        for sh_idx, sh in enumerate(o_shapes):
+            if sh is None:
+                continue
+            for i, d in enumerate(sh):
+                # Note: we ignore any shape element that is not typed (i.e. does
+                # not have a 'dtype' attribute). This means there may still
+                # remain int elements that are int32 on 32-bit platforms, but
+                # this works with `local_useless_subtensor`, so for now we
+                # keep it this way. See #266 for a better long-term fix.
+                if getattr(d, 'dtype', 'int64') != 'int64':
+                    assert d.dtype in theano.tensor.int_dtypes
+                    new_shape += sh[len(new_shape):i + 1]
+                    new_shape[i] = theano.tensor.cast(d, 'int64')
+            if new_shape:
+                # We replace the shape with wrong dtype by the one with 'int64'.
+                new_shape += sh[len(new_shape):]
+                o_shapes[sh_idx] = tuple(new_shape)
+                new_shape = []
        for r, s in izip(node.outputs, o_shapes):
            self.set_shape(r, s)

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -372,7 +372,8 @@ def rand_of_dtype(shape, dtype):
 def makeBroadcastTester(op, expected, checks={}, name=None, **kwargs):
-    name = str(op)
+    if name is None:
+        name = str(op)
    # Here we ensure the test name matches the name of the variable defined in
    # this script. This is needed to properly identify the test e.g. with the
    # --with-id option of nosetests, or simply to rerun a specific test that
@@ -628,6 +629,7 @@ CeilIntDivTester = makeBroadcastTester(
                 uinteger=(randint(2, 3).astype("uint8"),
                           randint_nonzero(2, 3).astype("uint8")),
                 ),
+    name='CeilIntDiv',
    # As we implement this function with neq, the gradient returned is always 0.
 #    grad=_grad_broadcast_div_mod_normal,
 #    grad_rtol=div_grad_rtol,
@@ -674,10 +676,13 @@ _grad_broadcast_pow_normal = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand
 _good_broadcast_pow_normal_float_pow = copy(_good_broadcast_pow_normal_float)
 del _good_broadcast_pow_normal_float_pow["empty2"]
-PowTester = makeBroadcastTester(op = pow,
+PowTester = makeBroadcastTester(
-                                  expected = lambda x, y: check_floatX((x, y), x ** y),
+        op=pow,
-                                  good = _good_broadcast_pow_normal_float,
+        expected=lambda x, y: check_floatX((x, y), x ** y),
-                                  grad = _grad_broadcast_pow_normal)
+        good=_good_broadcast_pow_normal_float,
+        grad= _grad_broadcast_pow_normal,
+        name='Pow')
 PowInplaceTester = makeBroadcastTester(op = inplace.pow_inplace,
                                       expected = lambda x, y: x ** y,
                                       good = _good_broadcast_pow_normal_float_pow,
@@ -1090,15 +1095,19 @@ ErfcInplaceTester = makeBroadcastTester(op = inplace.erfc_inplace,
                                        inplace = True,
                                        skip = skip_scipy)
-ZerosLikeTester =  makeBroadcastTester(op = tensor.zeros_like,
+ZerosLikeTester = makeBroadcastTester(
-                                        expected = numpy.zeros_like,
+        op=tensor.zeros_like,
-                                        good = _good_broadcast_unary_normal,
+        expected=numpy.zeros_like,
-                                        grad = _grad_broadcast_unary_normal)
+        good=_good_broadcast_unary_normal,
+        grad=_grad_broadcast_unary_normal,
+        name='ZerosLike')
-OnesLikeTester =  makeBroadcastTester(op = tensor.ones_like,
+OnesLikeTester = makeBroadcastTester(
-                                        expected = numpy.ones_like,
+        op=tensor.ones_like,
-                                        good = _good_broadcast_unary_normal,
+        expected=numpy.ones_like,
-                                        grad = _grad_broadcast_unary_normal)
+        good=_good_broadcast_unary_normal,
+        grad=_grad_broadcast_unary_normal,
+        name='OnesLike')
 DotTester = makeTester(name = 'DotTester',
                        op = dot,

--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
@@ -821,6 +821,27 @@ def test_dot22scalar():
                    cmp((0,4),(4,0),(0,0))
                    cmp((0,0),(0,0),(0,0))
+def test_dot22scalar_cast():
+    """
+    Test that in `dot22_to_dot22scalar` we properly cast integers to floats.
+    """
+    # Note that this test was failing before d5ff6904.
+    A = T.dmatrix()
+    for scalar_int_type in T.int_dtypes:
+        y = T.scalar(dtype=scalar_int_type)
+        f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
+        assert _dot22scalar in [x.op for x in f.maker.env.toposort()]
+    A = T.fmatrix()
+    for scalar_int_type in T.int_dtypes:
+        y = T.scalar(dtype=scalar_int_type)
+        f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
+        if scalar_int_type in ['int32', 'int64']:
+            assert _dot22 in [x.op for x in f.maker.env.toposort()]
+        else:
+            assert _dot22scalar in [x.op for x in f.maker.env.toposort()]
 def test_dot_w_self():
    # This can trigger problems in the optimization because what would normally be a gemm must
    # not be because the output is aliased to one of the inputs.