提交 e1e7f2e1 authored 作者: abergeron's avatar abergeron

Merge pull request #1804 from nouiz/fix_scan_mem

[BUG Fix scan mem optimization
......@@ -17,6 +17,7 @@ install:
# So we test with 0.8. Our internal buildbot have 0.7.2.
# We install it later only for the PART that need it.
# - "pip install -q scipy==0.8 --use-mirrors"
- "pip install nose-timelimit --use-mirrors"
- "pip install . --no-deps --use-mirrors"
# command to run tests
env:
......@@ -36,7 +37,7 @@ script:
- df -h
- ulimit -a
- echo $PART
- theano-nose -v $PART
- theano-nose --with-timelimit -v $PART
#after_script:
......
......@@ -189,9 +189,6 @@ optdb.register('Print1.51', PrintCurrentFunctionGraph('Post-stabilize'),
optdb.register('specialize', gof.EquilibriumDB(),
2, 'fast_run')
optdb.register('Print2.01', PrintCurrentFunctionGraph('Post-specialize'),
2.01,) # 'fast_run', 'fast_compile')
# misc special cases for speed that break canonicalization
optdb.register('uncanonicalize', gof.EquilibriumDB(),
3, 'fast_run')
......
......@@ -248,6 +248,11 @@ class SequenceDB(DB):
position_cutoff = tags[0].position_cutoff
opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
# We want to sort by position and then if collision by name
# for deterministic optimization. Since Python 2.2, sort is
# stable, so sort by name first, then by position. This give
# the order we want.
opts.sort(key=lambda obj: obj.name)
opts.sort(key=lambda obj: self.__position__[obj.name])
ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
if hasattr(tags[0], 'name'):
......
......@@ -1681,7 +1681,6 @@ scan_eqopt1 = theano.gof.EquilibriumDB()
scan_seqopt1 = theano.gof.SequenceDB()
scan_eqopt2 = theano.gof.EquilibriumDB()
scan_seqopt2 = theano.gof.EquilibriumDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_eqopt1', scan_eqopt1, .1, 'fast_run', 'scan')
......@@ -1694,8 +1693,6 @@ optdb.register('scanOp_make_inplace',
'inplace',
'scan')
scan_eqopt2.register(
'all_scan_opts', scan_seqopt2, 1, 'fast_run', 'scan')
scan_eqopt1.register(
'all_pushout_opt', scan_seqopt1, 1, 'fast_run', 'scan')
......@@ -1731,7 +1728,7 @@ scan_seqopt1.register('scan_pushout_dot1',
'scan')
scan_seqopt2.register('constant_folding_for_scan2',
scan_eqopt2.register('constant_folding_for_scan2',
opt.in2out(tensor.opt.constant_folding,
ignore_newtrees=True),
1,
......@@ -1739,7 +1736,7 @@ scan_seqopt2.register('constant_folding_for_scan2',
'scan')
scan_seqopt2.register('scanOp_remove_constants_and_unused_inputs1',
scan_eqopt2.register('scanOp_remove_constants_and_unused_inputs1',
opt.in2out(remove_constants_and_unused_inputs_scan,
ignore_newtrees=True),
2,
......@@ -1751,14 +1748,14 @@ scan_seqopt2.register('scanOp_remove_constants_and_unused_inputs1',
# after const merge but before stabilize so that we can have identity
# for equivalent nodes but we still have the chance to hoist stuff out
# of the scan later.
scan_seqopt2.register('scanOp_merge',
scan_eqopt2.register('scanOp_merge',
ScanMerge(),
4,
'fast_run',
'scan')
# After Merge optimization
scan_seqopt2.register('scanop_remove_constants_and_unused_inputs2',
scan_eqopt2.register('scanop_remove_constants_and_unused_inputs2',
opt.in2out(remove_constants_and_unused_inputs_scan,
ignore_newtrees=True),
5,
......@@ -1766,7 +1763,7 @@ scan_seqopt2.register('scanop_remove_constants_and_unused_inputs2',
'fast_run',
'scan')
scan_seqopt2.register('scanOp_merge_inouts',
scan_eqopt2.register('scanOp_merge_inouts',
opt.in2out(scan_merge_inouts, ignore_newtrees=True),
6,
'scan_merge_inouts',
......@@ -1776,14 +1773,14 @@ scan_seqopt2.register('scanOp_merge_inouts',
# Just before specialize to have the other optimization
# like constant folding being applied
# This don't introduce inplace.
scan_seqopt2.register('scanOp_save_mem',
scan_eqopt2.register('scanOp_save_mem',
ScanSaveMem(),
7,
'fast_run',
'scan')
# After everything else
scan_seqopt2.register('scanOp_remove_constants_and_unused_inputs3',
scan_eqopt2.register('scanOp_remove_constants_and_unused_inputs3',
opt.in2out(remove_constants_and_unused_inputs_scan,
ignore_newtrees=True),
8,
......
......@@ -303,7 +303,8 @@ def inplace_elemwise_optimizer_op(OP):
return inplace_elemwise_optimizer
inplace_elemwise_optimizer = inplace_elemwise_optimizer_op(T.Elemwise)
compile.optdb.register('inplace_opt', inplace_elemwise_optimizer, 75,
compile.optdb.register('inplace_elemwise_opt', inplace_elemwise_optimizer, 75,
'inplace_opt', # for historic reason
'inplace_elemwise_optimizer',
'fast_run', 'inplace')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论