提交 9c40f5ef authored 作者: Frederic Bastien's avatar Frederic Bastien

Use the new ProxyDB to register 2 time the gpus optimizations.

上级 072b8fc5
...@@ -193,7 +193,10 @@ def use(device, force=False, default_to_move_computation_to_gpu = True, ...@@ -193,7 +193,10 @@ def use(device, force=False, default_to_move_computation_to_gpu = True,
"is already in use.") %(str(device), use.device_number)) "is already in use.") %(str(device), use.device_number))
if default_to_move_computation_to_gpu: if default_to_move_computation_to_gpu:
optdb.add_tags('gpu', optdb.add_tags('gpu_opt',
'fast_run',
'inplace')
optdb.add_tags('gpu_after_fusion',
'fast_run', 'fast_run',
'inplace') 'inplace')
......
...@@ -6,8 +6,10 @@ import theano ...@@ -6,8 +6,10 @@ import theano
import numpy import numpy
from theano import scalar as scal from theano import scalar as scal
from theano import tensor, compile, gof from theano import tensor, compile, gof
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, Optimizer,
toolbox, DestroyHandler, EquilibriumOptimizer) from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
Optimizer, toolbox, DestroyHandler,
EquilibriumOptimizer)
from theano.sandbox.cuda.basic_ops import * from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -31,12 +33,16 @@ gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1, ...@@ -31,12 +33,16 @@ gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1,
'fast_run', 'inplace') 'fast_run', 'inplace')
gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2,
'fast_run', 'inplace') 'fast_run', 'inplace')
optdb.register('gpu', optdb.register('gpu_opt',
gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1) gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1,
'gpu')
# This second pass is needed as the fusion can put all the non float32 code # This second pass is needed as the fusion can put all the non float32 code
# inside the elemwise. When it there is no float64 op, this is working. # inside the elemwise. When it there is no float64 op, this is working.
optdb.register('gpu_after_fusion', optdb.register('gpu_after_fusion',
gpu_seqopt, optdb.__position__.get('elemwise_fusion', 71) + .1) ProxyDB(gpu_seqopt),
optdb.__position__.get('elemwise_fusion', 71) + .1,
'gpu')
def register_opt(*tags, **kwargs): def register_opt(*tags, **kwargs):
def f(local_opt): def f(local_opt):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论