提交 34f23400 authored 作者: Frederic's avatar Frederic

Move fusion before AddDestroyHandler.

We take care to still have this order: gpu, cpu/gpu fusion, then gpu again.
上级 48fcbbc9
......@@ -62,7 +62,7 @@ optdb.register('gpu_opt',
# inside the elemwise. When there is no float64 op, this is working.
optdb.register('gpu_after_fusion',
ProxyDB(gpu_seqopt),
optdb.__position__.get('elemwise_fusion', 71) + .1,
optdb.__position__.get('elemwise_fusion', 49) + .1,
'gpu')
......@@ -1338,9 +1338,10 @@ gpu_local_elemwise_fusion = tensor.opt.local_elemwise_fusion_op(
max_inputs_to_GpuElemwise)
if config.gpu.local_elemwise_fusion:
_logger.debug("enabling optimization fusion of gpu elemwise in fast_run")
#Must be after cpu fusion at 40, gpu at 48.5 and before AddDestroyHandler at 49.5
optdb.register('gpu_elemwise_fusion',
tensor.opt.FusionOptimizer(gpu_local_elemwise_fusion),
71.00, 'fast_run', 'fusion',
49, 'fast_run', 'fusion',
'local_elemwise_fusion', 'gpu')
else:
_logger.debug(("not enabling optimization fusion of gpu elemwise in "
......
......@@ -1505,6 +1505,12 @@ class GemmOptimizer(Optimizer):
callbacks_before = fgraph.execute_callbacks_times.copy()
callback_before = fgraph.execute_callbacks_time
class Updater:
def on_import(self, fgraph, new_node, reason):
if new_node is not node:
nodelist.append(new_node)
u = Updater()
fgraph.attach_feature(u)
while did_something:
nb_iter += 1
t0 = time.time()
......@@ -1551,6 +1557,7 @@ class GemmOptimizer(Optimizer):
except ReplacementDidntRemovedError, e:
nb_replacement_didn_t_remove += 1
self.warned = True
fgraph.remove_feature(u)
if fgraph.profile:
validate_time = fgraph.profile.validate_time - validate_before
callback_time = fgraph.execute_callbacks_time - callback_before
......
......@@ -4853,13 +4853,14 @@ class FusionOptimizer(Optimizer):
if config.tensor.local_elemwise_fusion:
_logger.debug("enabling optimization fusion elemwise in fast_run")
#Must be after gpu(48.5) and before AddDestroyHandler(49.5)
compile.optdb.register('elemwise_fusion',
FusionOptimizer(local_elemwise_fusion), 71.00,
FusionOptimizer(local_elemwise_fusion), 49,
'fast_run', 'fusion', 'local_elemwise_fusion',
'FusionOptimizer')
else:
_logger.debug("not enabling optimization fusion elemwise in fast_run")
compile.optdb.register('elemwise_fusion',
FusionOptimizer(local_elemwise_fusion), 71.00,
FusionOptimizer(local_elemwise_fusion), 49,
'fusion', 'local_elemwise_fusion',
'FusionOptimizer')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论