Removing the Clone as an optimization, just adding it at the end of

optimization much as it is done with deep copy

Removing the Clone as an optimization, just adding it at the end of
e0b53770 · Razvan Pascanu · df48ecec · e0b53770 · e0b53770 · e0b53770
--- a/theano/scan_module/__init__.py
+++ b/theano/scan_module/__init__.py
@@ -601,48 +601,6 @@ optdb.register( 'scanOp_merge'
               , 2.39
               , 'fast_run')
 '''
-class AddCloneNodes(Optimizer):
-    """ Graph Optimizer that adds clone nodes if output is a constant or
-    input"""
-    def __init__(self):
-        Optimizer.__init__(self)
-
-
-    def add_requirements(self,env):
-        env.extend(toolbox.ReplaceValidate())
-
-    def apply(self, env):
-        # Check for constants
-        for out in env.outputs:
-            if (out in env.inputs or
-                isinstance(out, tensor.Constant)):
-                env.replace_all_validate(
-                    [(out, scan_utils.cloneOp(out))],
-                      reason = 'adding_Clone_Nodes')
-
-
-
-optdb.register( 'add_clone_nodes'
-               , AddCloneNodes()
-               , 70
-               , 'fast_run')
-
-
-@gof.local_optimizer([None])
-def clone_make_inplace(node):
-    op = node.op
-    if ( isinstance(op, scan_utils.Clone) and
-        (not op.as_view) ):
-        return scan_utils.Clone(as_view=True
-                                , gpu = op.gpu).make_node(*node.inputs).outputs
-    return False
-
-optdb.register( 'cloneOp_make_inplace'
-               , opt.in2out(clone_make_inplace,ignore_newtrees=True)
-               , 99
-               , 'fast_run'
-               , 'inplace')
-


 from theano.sandbox import cuda
@@ -678,34 +636,6 @@ if cuda.cuda_available:
        else:
            return x

-    @register_opt()
-    @local_optimizer([])
-    def gpuCloneOptimization(node):
-        if node.op == gpu_from_host:
-            host_input = node.inputs[0]
-            if ( host_input.owner
-                and host_input.owner.op == scan_utils.Clone
-                and host_input.owner.inputs[0].dtype == config.floatX
-                and not host_input.owner.op.gpu):
-                     x = host_owner.inputs[0]
-                     x = safe_to_gpu(x)
-                     op = host_owner.op
-                     nw_op = scan_utils.Clone(as_view=op.as_view
-                                              , gpu = True).make_node(x)
-                     outputs = [ safe_to_cpu(x) for x in nw_op.outputs]
-                     return outputs
-        if (type(node.op) == scan_utils.Clone
-            and not node.op.gpu
-            and node.inputs[0].dtype == config.floatX ):
-                     x = node.inputs[0]
-                     x = safe_to_gpu(x)
-                     op = node.op
-                     nw_op = scan_utils.Clone(as_view=op.as_view
-                                              , gpu = True).make_node(x)
-                     outputs = [ safe_to_cpu(x) for x in nw_op.outputs]
-                     return outputs
-        return False
-

    @register_opt()
    @local_optimizer([])

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -919,19 +919,11 @@ class Scan(Op):
                          scan_shared_ins     +
                          old_scan_shared_ins +
                          inner_other_args )
-        _inner_gfn_outs = ( scan_mit_mot_outs +
+        inner_gfn_outs = ( scan_mit_mot_outs +
                           scan_nit_sot_outs +
                           scan_shared_outs  +
                           old_scan_shared_outs )

-        # Create Op and apply it
-        inner_gfn_outs = []
-        for o in _inner_gfn_outs:
-            if (o in inner_gfn_ins or
-                isinstance(o, tensor.Constant)):
-                inner_gfn_outs.append( scan_utils.cloneOp(o) )
-            else:
-                inner_gfn_outs.append(o)
        local_op = Scan( inner_gfn_ins, inner_gfn_outs, info )
        outputs = local_op(*scan_inputs)
        if type(outputs) not in (list, tuple):

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -276,6 +276,14 @@ def scan_function( inputs
    optimizer(env)
    _logger.debug('Optimizing took %f seconds' %(time.time() - t0))
    mask = [ 0 for x in env.outputs[slices:] ]
+
+
+    for i,out in enumerate(env.outputs):
+        if (out in env.inputs or
+            isinstance(out, tensor.Constant)):
+                env.change_input('output', i, Clone()(out) )
+
+
    for i in xrange(len(env.outputs[slices:])):
        views_of_output_i = set()
        view_tree_set(alias_root(env.outputs[i]), views_of_output_i)
@@ -291,7 +299,6 @@ def scan_function( inputs
            for input_j in env.inputs:
                # do not allow outputs to be aliased to an inputs (j), unless
                # a) that j'th input has been 'destroyed' by e.g. in-place computations
-                # b) that j'th input is a shared variable that is also being updated
                if hasattr(env,'get_destroyers_of') and env.get_destroyers_of(input_j):
                    continue
                if input_j in views_of_output_i:
@@ -510,36 +517,24 @@ def expand( tensor_var, size):


 class Clone(Op):
-    def __init__(self,as_view=False, gpu=False):
-        self.as_view = as_view
-        self.gpu = gpu
-        if as_view:
-            self.view_map = {0:[0]}
+    def __init__(self):
+        self.view_map = {0:[0]}

    def __eq__(self, other):
-        return type(self) == type(other) and self.as_view == other.as_view
+        return type(self) == type(other)

    def __hash__(self):
-        return hash(type(self)) ^ hash(self.as_view)
+        return hash(type(self))

    def __str__(self):
-        if self.as_view:
-            return 'clone[as_view]'
-        else:
-            return 'clone'
+        return 'clone[as_view]'

    def make_node(self, *inputs):
        x = inputs[0]
-        if self.gpu:
-            return Apply(self, inputs, [x.type()] )
-        else:
-            return Apply(self, inputs, [ safe_new(x)] )
+        return Apply(self, inputs, [x.type()] )

    def perform( self, node, args, outs):
-        if self.as_view:
-            outs[0][0] = args[0]
-        else:
-            outs[0][0] = args[0].copy()
+        outs[0][0] = args[0]

    def infer_shape(self, node, input_shapes):
        return input_shapes