提交 6dbe4953 authored 作者: Frederic's avatar Frederic

Merge remote-tracking branch 'central/master' into rc1

Conflicts: NEWS.txt
差异被折叠。
...@@ -28,10 +28,14 @@ elif sys.argv[1] in ('clear'): ...@@ -28,10 +28,14 @@ elif sys.argv[1] in ('clear'):
(len(items), ', '.join(items))) (len(items), ', '.join(items)))
elif sys.argv[1] in ('list'): elif sys.argv[1] in ('list'):
theano.gof.compiledir.print_compiledir_content() theano.gof.compiledir.print_compiledir_content()
elif sys.argv[1] == 'unlock':
theano.gof.compilelock.force_unlock()
print 'Lock successfully removed!'
else: else:
print 'command "%s" not recognized' % sys.argv[1] print 'command "%s" not recognized' % sys.argv[1]
print 'Type "theano-cache" to print the cache location' print 'Type "theano-cache" to print the cache location'
print 'Type "theano-cache clear" to erase the cache' print 'Type "theano-cache clear" to erase the cache'
print 'Type "theano-cache list" to print the cache content' print 'Type "theano-cache list" to print the cache content'
print 'Type "theano-cache unlock" to unlock the cache directory'
sys.exit(1) sys.exit(1)
...@@ -726,7 +726,7 @@ Currently, due to memory fragmentation issue in Windows, the ...@@ -726,7 +726,7 @@ Currently, due to memory fragmentation issue in Windows, the
test-suite breaks at some point when using ``nosetests``, with many error test-suite breaks at some point when using ``nosetests``, with many error
messages looking messages looking
like: ``DLL load failed: Not enough storage is available to process this like: ``DLL load failed: Not enough storage is available to process this
command``. As a result, you should instead run command``. As a workaround, you can instead run:
.. code-block:: bash .. code-block:: bash
...@@ -736,6 +736,13 @@ This will run tests in batches of 100, which should avoid memory errors. ...@@ -736,6 +736,13 @@ This will run tests in batches of 100, which should avoid memory errors.
Note that this script calls ``nosetests``, which may require being run from Note that this script calls ``nosetests``, which may require being run from
within a MinGW shell if you installed Nose manually as described above. within a MinGW shell if you installed Nose manually as described above.
.. note::
The above workaround to run tests with the ``run_tests_in_batch.py`` script
is currently imperfect: some tests are not properly collected by nosetests
in this mode. This may result in some weird test failures starting with
``ERROR: Failure: OSError``. We do not yet have a fix for this problem.
Editing code in Visual Studio Editing code in Visual Studio
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -21,9 +21,27 @@ timeout_before_override = 120 ...@@ -21,9 +21,27 @@ timeout_before_override = 120
# 'refresh_every' seconds. # 'refresh_every' seconds.
refresh_every = 60 refresh_every = 60
def get_lock():
def force_unlock():
"""
Delete the compilation lock if someone else has it.
"""
global timeout_before_override
timeout_backup = timeout_before_override
timeout_before_override = 0
try:
get_lock(min_wait=0, max_wait=0.001)
release_lock()
finally:
timeout_before_override = timeout_backup
def get_lock(**kw):
""" """
Obtain lock on compilation directory. Obtain lock on compilation directory.
:param kw: Additional arguments to be forwarded to the `lock` function when
acquiring the lock.
""" """
if not hasattr(get_lock, 'n_lock'): if not hasattr(get_lock, 'n_lock'):
# Initialization. # Initialization.
...@@ -47,7 +65,7 @@ def get_lock(): ...@@ -47,7 +65,7 @@ def get_lock():
if get_lock.lock_is_enabled: if get_lock.lock_is_enabled:
# Only really try to acquire the lock if we do not have it already. # Only really try to acquire the lock if we do not have it already.
if get_lock.n_lock == 0: if get_lock.n_lock == 0:
lock(get_lock.lock_dir, timeout = timeout_before_override) lock(get_lock.lock_dir, timeout=timeout_before_override, **kw)
atexit.register(Unlocker.unlock, get_lock.unlocker) atexit.register(Unlocker.unlock, get_lock.unlocker)
# Store time at which the lock was set. # Store time at which the lock was set.
get_lock.start_time = time.time() get_lock.start_time = time.time()
......
...@@ -5,6 +5,12 @@ START=`date +%s` ...@@ -5,6 +5,12 @@ START=`date +%s`
NOSETESTS=nosetests NOSETESTS=nosetests
ARGS=$@ ARGS=$@
PROFILING="" PROFILING=""
RELEASE=""
if [ "$1" == "--release" ]; then
RELEASE="True"
shift
ARGS=$@
fi
if [ "$1" == "--buildbot" ]; then if [ "$1" == "--buildbot" ]; then
ROOT_CWD=/Tmp/nightly_build ROOT_CWD=/Tmp/nightly_build
FLAGS=compiledir=/Tmp/lisa_theano_compile_dir_theano FLAGS=compiledir=/Tmp/lisa_theano_compile_dir_theano
...@@ -17,7 +23,10 @@ fi ...@@ -17,7 +23,10 @@ fi
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
# We don't want warning for fixed error in the buildbot
FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,$FLAGS FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,$FLAGS
# We want to see correctly optimization error, so make make them raise an error
FLAGS=on_opt_error=raise,$FLAGS
# Ignore user device and floatX config, because: # Ignore user device and floatX config, because:
# 1. Tests are intended to be run with device=cpu. # 1. Tests are intended to be run with device=cpu.
# 2. We explicitly add 'floatX=float32' in one run of the test suite below, # 2. We explicitly add 'floatX=float32' in one run of the test suite below,
...@@ -25,14 +34,23 @@ FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug ...@@ -25,14 +34,23 @@ FLAGS=${THEANO_FLAGS},warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug
FLAGS=${FLAGS},device=cpu,floatX=float64 FLAGS=${FLAGS},device=cpu,floatX=float64
export PYTHONPATH=${ROOT_CWD}:$PYTHONPATH export PYTHONPATH=${ROOT_CWD}:$PYTHONPATH
if [ "$RELEASE" ]; then
echo "Executing nosetests with default mode and compute_test_value"
THEANO_FLAGS=${FLAGS},compute_test_value=ignore ${NOSETESTS} ${ARGS}
echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l
fi
echo "Executing nosetests with mode=FAST_COMPILE" echo "Executing nosetests with mode=FAST_COMPILE"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS} THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
echo "Executing nosetests with mode=FAST_RUN" echo "Executing nosetests with mode=FAST_RUN"
THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS} THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
echo "Executing nosetests with mode=FAST_RUN,floatX=float32" echo "Executing nosetests with mode=FAST_RUN,floatX=float32"
THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${ARGS} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
......
...@@ -555,8 +555,14 @@ class test_structureddot(unittest.TestCase): ...@@ -555,8 +555,14 @@ class test_structureddot(unittest.TestCase):
class DotTests(unittest.TestCase): class DotTests(unittest.TestCase):
def setUp(self): def setUp(self):
x_size = (10, 1000) # On 32-bit platforms we use smaller matrices to avoid running out of
y_size = (1000, 10000) # memory during tests.
if theano.gof.cmodule.local_bitwidth() <= 32:
x_size = (10, 100)
y_size = (100, 1000)
else:
x_size = (10, 1000)
y_size = (1000, 10000)
self.x_csr = scipy.sparse.csr_matrix( self.x_csr = scipy.sparse.csr_matrix(
numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX) numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
......
...@@ -890,21 +890,28 @@ def res_is_a(node, op, maxclients=None): ...@@ -890,21 +890,28 @@ def res_is_a(node, op, maxclients=None):
and retval and retval
def _as_scalar(res): def _as_scalar(res, dtype=None):
"""Return None or a TensorVariable whose type is in T.float_scalar_types""" """Return None or a TensorVariable whose type is in T.float_scalar_types"""
if dtype is None:
dtype = config.floatX
if numpy.all(res.type.broadcastable): if numpy.all(res.type.broadcastable):
while res.owner and isinstance(res.owner.op, T.DimShuffle): while res.owner and isinstance(res.owner.op, T.DimShuffle):
res = res.owner.inputs[0] res = res.owner.inputs[0]
if res.type.broadcastable: # may still have some number of True's # may still have some number of True's
if res.type.broadcastable:
rval = res.dimshuffle() rval = res.dimshuffle()
else: else:
rval = res rval = res
if rval.type.dtype[:3] in ('int', 'uin'): if rval.type.dtype[:3] in ('int', 'uin'):
rval = cast(rval, theano.config.floatX) #may lose precision !? # We check that the upcast of res and dtype won't change dtype.
# If dtype is float64, we will cast int64 to float64.
#if isinstance(rval, T.Constant): # This is valid when res is a scalar used as input to a dot22
#rval = rval.data.flatten()[0] # as the cast of the scalar can be done before or after the dot22
# and this will give the same result.
if theano.scalar.upcast(res.dtype, dtype) == dtype:
return T.cast(rval, dtype)
else:
return None
return rval return rval
...@@ -1567,7 +1574,7 @@ def local_dot22_to_dot22scalar(node): ...@@ -1567,7 +1574,7 @@ def local_dot22_to_dot22scalar(node):
#return False #TODO fix #return False #TODO fix
dot22_idx = i_dot22.index(True) dot22_idx = i_dot22.index(True)
d = node.inputs[dot22_idx] d = node.inputs[dot22_idx]
i_scalar = [_as_scalar(x) for x in node.inputs] i_scalar = [_as_scalar(x, dtype=d.dtype) for x in node.inputs]
if not any(i_scalar): if not any(i_scalar):
i_mul = [x.owner and x.owner.op ==T.mul for x in node.inputs] i_mul = [x.owner and x.owner.op ==T.mul for x in node.inputs]
if not any(i_mul): if not any(i_mul):
...@@ -1581,10 +1588,10 @@ def local_dot22_to_dot22scalar(node): ...@@ -1581,10 +1588,10 @@ def local_dot22_to_dot22scalar(node):
mul_idx = i_mul.index(True)#we take the first mul! mul_idx = i_mul.index(True)#we take the first mul!
m = node.inputs[mul_idx] m = node.inputs[mul_idx]
if len(m.owner.inputs)==2 and any([_as_scalar(x) for x in m.owner.inputs]): if len(m.owner.inputs)==2 and any([_as_scalar(x, dtype=d.dtype) for x in m.owner.inputs]):
scalar_idx = -1 scalar_idx = -1
for i,x in enumerate(m.owner.inputs): for i,x in enumerate(m.owner.inputs):
if _as_scalar(x) and (theano.scalar.upcast(x.type.dtype,d.type.dtype) if _as_scalar(x, dtype=d.dtype) and (theano.scalar.upcast(x.type.dtype,d.type.dtype)
== d.type.dtype): == d.type.dtype):
scalar_idx = i scalar_idx = i
break break
...@@ -1594,7 +1601,7 @@ def local_dot22_to_dot22scalar(node): ...@@ -1594,7 +1601,7 @@ def local_dot22_to_dot22scalar(node):
'of the scalar cannot be upcasted to the matrix type', 'of the scalar cannot be upcasted to the matrix type',
node.inputs, [x.type for x in node.inputs]) node.inputs, [x.type for x in node.inputs])
return False return False
a = T.cast(_as_scalar(m.owner.inputs[scalar_idx]), d.type.dtype) a = T.cast(_as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype)
assert not a.type.ndim assert not a.type.ndim
dot=_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a) dot=_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)
......
...@@ -920,6 +920,28 @@ class ShapeFeature(object): ...@@ -920,6 +920,28 @@ class ShapeFeature(object):
+ ' != len(node.outputs) = ' + ' != len(node.outputs) = '
+ str(len(node.outputs))) + str(len(node.outputs)))
# Ensure shapes are in 'int64'. This is to make sure the assert
# found in the `local_useless_subtensor` optimization does not fail.
new_shape = []
for sh_idx, sh in enumerate(o_shapes):
if sh is None:
continue
for i, d in enumerate(sh):
# Note: we ignore any shape element that is not typed (i.e. does
# not have a 'dtype' attribute). This means there may still
# remain int elements that are int32 on 32-bit platforms, but
# this works with `local_useless_subtensor`, so for now we
# keep it this way. See #266 for a better long-term fix.
if getattr(d, 'dtype', 'int64') != 'int64':
assert d.dtype in theano.tensor.int_dtypes
new_shape += sh[len(new_shape):i + 1]
new_shape[i] = theano.tensor.cast(d, 'int64')
if new_shape:
# We replace the shape with wrong dtype by the one with 'int64'.
new_shape += sh[len(new_shape):]
o_shapes[sh_idx] = tuple(new_shape)
new_shape = []
for r, s in izip(node.outputs, o_shapes): for r, s in izip(node.outputs, o_shapes):
self.set_shape(r, s) self.set_shape(r, s)
......
...@@ -372,7 +372,8 @@ def rand_of_dtype(shape, dtype): ...@@ -372,7 +372,8 @@ def rand_of_dtype(shape, dtype):
def makeBroadcastTester(op, expected, checks={}, name=None, **kwargs): def makeBroadcastTester(op, expected, checks={}, name=None, **kwargs):
name = str(op) if name is None:
name = str(op)
# Here we ensure the test name matches the name of the variable defined in # Here we ensure the test name matches the name of the variable defined in
# this script. This is needed to properly identify the test e.g. with the # this script. This is needed to properly identify the test e.g. with the
# --with-id option of nosetests, or simply to rerun a specific test that # --with-id option of nosetests, or simply to rerun a specific test that
...@@ -628,6 +629,7 @@ CeilIntDivTester = makeBroadcastTester( ...@@ -628,6 +629,7 @@ CeilIntDivTester = makeBroadcastTester(
uinteger=(randint(2, 3).astype("uint8"), uinteger=(randint(2, 3).astype("uint8"),
randint_nonzero(2, 3).astype("uint8")), randint_nonzero(2, 3).astype("uint8")),
), ),
name='CeilIntDiv',
# As we implement this function with neq, the gradient returned is always 0. # As we implement this function with neq, the gradient returned is always 0.
# grad=_grad_broadcast_div_mod_normal, # grad=_grad_broadcast_div_mod_normal,
# grad_rtol=div_grad_rtol, # grad_rtol=div_grad_rtol,
...@@ -674,10 +676,13 @@ _grad_broadcast_pow_normal = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand ...@@ -674,10 +676,13 @@ _grad_broadcast_pow_normal = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand
_good_broadcast_pow_normal_float_pow = copy(_good_broadcast_pow_normal_float) _good_broadcast_pow_normal_float_pow = copy(_good_broadcast_pow_normal_float)
del _good_broadcast_pow_normal_float_pow["empty2"] del _good_broadcast_pow_normal_float_pow["empty2"]
PowTester = makeBroadcastTester(op = pow, PowTester = makeBroadcastTester(
expected = lambda x, y: check_floatX((x, y), x ** y), op=pow,
good = _good_broadcast_pow_normal_float, expected=lambda x, y: check_floatX((x, y), x ** y),
grad = _grad_broadcast_pow_normal) good=_good_broadcast_pow_normal_float,
grad= _grad_broadcast_pow_normal,
name='Pow')
PowInplaceTester = makeBroadcastTester(op = inplace.pow_inplace, PowInplaceTester = makeBroadcastTester(op = inplace.pow_inplace,
expected = lambda x, y: x ** y, expected = lambda x, y: x ** y,
good = _good_broadcast_pow_normal_float_pow, good = _good_broadcast_pow_normal_float_pow,
...@@ -1090,15 +1095,19 @@ ErfcInplaceTester = makeBroadcastTester(op = inplace.erfc_inplace, ...@@ -1090,15 +1095,19 @@ ErfcInplaceTester = makeBroadcastTester(op = inplace.erfc_inplace,
inplace = True, inplace = True,
skip = skip_scipy) skip = skip_scipy)
ZerosLikeTester = makeBroadcastTester(op = tensor.zeros_like, ZerosLikeTester = makeBroadcastTester(
expected = numpy.zeros_like, op=tensor.zeros_like,
good = _good_broadcast_unary_normal, expected=numpy.zeros_like,
grad = _grad_broadcast_unary_normal) good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
name='ZerosLike')
OnesLikeTester = makeBroadcastTester(op = tensor.ones_like, OnesLikeTester = makeBroadcastTester(
expected = numpy.ones_like, op=tensor.ones_like,
good = _good_broadcast_unary_normal, expected=numpy.ones_like,
grad = _grad_broadcast_unary_normal) good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
name='OnesLike')
DotTester = makeTester(name = 'DotTester', DotTester = makeTester(name = 'DotTester',
op = dot, op = dot,
......
...@@ -821,6 +821,27 @@ def test_dot22scalar(): ...@@ -821,6 +821,27 @@ def test_dot22scalar():
cmp((0,4),(4,0),(0,0)) cmp((0,4),(4,0),(0,0))
cmp((0,0),(0,0),(0,0)) cmp((0,0),(0,0),(0,0))
def test_dot22scalar_cast():
"""
Test that in `dot22_to_dot22scalar` we properly cast integers to floats.
"""
# Note that this test was failing before d5ff6904.
A = T.dmatrix()
for scalar_int_type in T.int_dtypes:
y = T.scalar(dtype=scalar_int_type)
f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
assert _dot22scalar in [x.op for x in f.maker.env.toposort()]
A = T.fmatrix()
for scalar_int_type in T.int_dtypes:
y = T.scalar(dtype=scalar_int_type)
f = theano.function([A, y], T.dot(A, A) * y, mode=mode_blas_opt)
if scalar_int_type in ['int32', 'int64']:
assert _dot22 in [x.op for x in f.maker.env.toposort()]
else:
assert _dot22scalar in [x.op for x in f.maker.env.toposort()]
def test_dot_w_self(): def test_dot_w_self():
# This can trigger problems in the optimization because what would normally be a gemm must # This can trigger problems in the optimization because what would normally be a gemm must
# not be because the output is aliased to one of the inputs. # not be because the output is aliased to one of the inputs.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论