提交 e0b53770 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Removing the Clone as an optimization, just adding it at the end of

optimization much as it is done with deep copy
上级 df48ecec
......@@ -601,48 +601,6 @@ optdb.register( 'scanOp_merge'
, 2.39
, 'fast_run')
'''
class AddCloneNodes(Optimizer):
""" Graph Optimizer that adds clone nodes if output is a constant or
input"""
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def apply(self, env):
# Check for constants
for out in env.outputs:
if (out in env.inputs or
isinstance(out, tensor.Constant)):
env.replace_all_validate(
[(out, scan_utils.cloneOp(out))],
reason = 'adding_Clone_Nodes')
optdb.register( 'add_clone_nodes'
, AddCloneNodes()
, 70
, 'fast_run')
@gof.local_optimizer([None])
def clone_make_inplace(node):
op = node.op
if ( isinstance(op, scan_utils.Clone) and
(not op.as_view) ):
return scan_utils.Clone(as_view=True
, gpu = op.gpu).make_node(*node.inputs).outputs
return False
optdb.register( 'cloneOp_make_inplace'
, opt.in2out(clone_make_inplace,ignore_newtrees=True)
, 99
, 'fast_run'
, 'inplace')
from theano.sandbox import cuda
......@@ -678,34 +636,6 @@ if cuda.cuda_available:
else:
return x
@register_opt()
@local_optimizer([])
def gpuCloneOptimization(node):
if node.op == gpu_from_host:
host_input = node.inputs[0]
if ( host_input.owner
and host_input.owner.op == scan_utils.Clone
and host_input.owner.inputs[0].dtype == config.floatX
and not host_input.owner.op.gpu):
x = host_owner.inputs[0]
x = safe_to_gpu(x)
op = host_owner.op
nw_op = scan_utils.Clone(as_view=op.as_view
, gpu = True).make_node(x)
outputs = [ safe_to_cpu(x) for x in nw_op.outputs]
return outputs
if (type(node.op) == scan_utils.Clone
and not node.op.gpu
and node.inputs[0].dtype == config.floatX ):
x = node.inputs[0]
x = safe_to_gpu(x)
op = node.op
nw_op = scan_utils.Clone(as_view=op.as_view
, gpu = True).make_node(x)
outputs = [ safe_to_cpu(x) for x in nw_op.outputs]
return outputs
return False
@register_opt()
@local_optimizer([])
......
......@@ -919,19 +919,11 @@ class Scan(Op):
scan_shared_ins +
old_scan_shared_ins +
inner_other_args )
_inner_gfn_outs = ( scan_mit_mot_outs +
inner_gfn_outs = ( scan_mit_mot_outs +
scan_nit_sot_outs +
scan_shared_outs +
old_scan_shared_outs )
# Create Op and apply it
inner_gfn_outs = []
for o in _inner_gfn_outs:
if (o in inner_gfn_ins or
isinstance(o, tensor.Constant)):
inner_gfn_outs.append( scan_utils.cloneOp(o) )
else:
inner_gfn_outs.append(o)
local_op = Scan( inner_gfn_ins, inner_gfn_outs, info )
outputs = local_op(*scan_inputs)
if type(outputs) not in (list, tuple):
......
......@@ -276,6 +276,14 @@ def scan_function( inputs
optimizer(env)
_logger.debug('Optimizing took %f seconds' %(time.time() - t0))
mask = [ 0 for x in env.outputs[slices:] ]
for i,out in enumerate(env.outputs):
if (out in env.inputs or
isinstance(out, tensor.Constant)):
env.change_input('output', i, Clone()(out) )
for i in xrange(len(env.outputs[slices:])):
views_of_output_i = set()
view_tree_set(alias_root(env.outputs[i]), views_of_output_i)
......@@ -291,7 +299,6 @@ def scan_function( inputs
for input_j in env.inputs:
# do not allow outputs to be aliased to an inputs (j), unless
# a) that j'th input has been 'destroyed' by e.g. in-place computations
# b) that j'th input is a shared variable that is also being updated
if hasattr(env,'get_destroyers_of') and env.get_destroyers_of(input_j):
continue
if input_j in views_of_output_i:
......@@ -510,36 +517,24 @@ def expand( tensor_var, size):
class Clone(Op):
def __init__(self,as_view=False, gpu=False):
self.as_view = as_view
self.gpu = gpu
if as_view:
self.view_map = {0:[0]}
def __init__(self):
self.view_map = {0:[0]}
def __eq__(self, other):
return type(self) == type(other) and self.as_view == other.as_view
return type(self) == type(other)
def __hash__(self):
return hash(type(self)) ^ hash(self.as_view)
return hash(type(self))
def __str__(self):
if self.as_view:
return 'clone[as_view]'
else:
return 'clone'
return 'clone[as_view]'
def make_node(self, *inputs):
x = inputs[0]
if self.gpu:
return Apply(self, inputs, [x.type()] )
else:
return Apply(self, inputs, [ safe_new(x)] )
return Apply(self, inputs, [x.type()] )
def perform( self, node, args, outs):
if self.as_view:
outs[0][0] = args[0]
else:
outs[0][0] = args[0].copy()
outs[0][0] = args[0]
def infer_shape(self, node, input_shapes):
return input_shapes
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论