replaced host_to_gpu with transfer

3db235a7 · sentient07 · Reyhane Askari · a4126bcc · 3db235a7 · 3db235a7
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -663,8 +663,8 @@ class GpuFromHost(Op):
    def grad(self, inputs, grads):
        gz, = grads
-        return [host_from_gpu(as_gpuarray_variable(
+        return [as_gpuarray_variable(
-                gz, context_name=self.context_name))]
+                gz, context_name=self.context_name).transfer('cpu')]
    def R_op(self, inputs, eval_points):
        ev, = eval_points
@@ -1132,7 +1132,7 @@ class GpuReshape(HideC, tensor.Reshape):
        ctx_name = infer_context_name(x)
        x = as_gpuarray_variable(x, context_name=ctx_name)
        shp = tensor.as_tensor_variable(shp)
-        res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
+        res = x.transfer('cpu').reshape(shp, ndim=self.ndim)
        otype = GpuArrayType(dtype=res.dtype,
                             broadcastable=res.broadcastable,
                             context_name=ctx_name)

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -172,7 +172,7 @@ def safe_to_gpu(x, ctx_name):
 def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
-        return host_from_gpu(x)
+        return x.transfer('cpu')
    else:
        return x
@@ -236,7 +236,7 @@ def op_lifter(OP, cuda_only=False):
                    elif isinstance(new_op, (tuple, list)):
                        return [safe_to_cpu(o) for o in new_op]
                    else:  # suppose it is a variable on the GPU
-                        return [host_from_gpu(new_op)]
+                        return [new_op.transfer('cpu')]
            return False
        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
@@ -269,7 +269,7 @@ class InputToGpuOptimizer(Optimizer):
                continue
            try:
-                new_input = host_from_gpu(gpu_from_host(target)(input))
+                new_input = gpu_from_host(target)(input).transfer('cpu')
                fgraph.replace_validate(input, new_input,
                                        "InputToGpuOptimizer")
            except TypeError:
@@ -430,7 +430,7 @@ class GraphToGPU(Optimizer):
                        new_o.owner.inputs[0].type == o.type):
                    new_o = new_o.owner.inputs[0]
                else:
-                    new_o = safe_to_cpu(new_o)
+                    new_o = new_o.transfer('cpu')
            new_nodes.append(new_o)
        fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes),
                                    reason=self.__class__.__name__)
@@ -546,7 +546,7 @@ def local_cut_gpu_transfers(node):
        # gpub ->
        if isinstance(n2.op, GpuToGpu):
-            return [host_from_gpu(n2.inputs[0])]
+            return [n2.inputs[0].transfer('cpu')]
    # ? -> gpua -> gpub
    elif isinstance(node.op, GpuToGpu):
@@ -600,7 +600,7 @@ def local_gpua_alloc2(node):
                i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:])
            for c, idx in node.outputs[0].clients)):
-        return [host_from_gpu(gpu_alloc(None)(*node.inputs))]
+        return [gpu_alloc(None)(*node.inputs).transfer('cpu')]
 @register_opt('fast_compile')
@@ -918,7 +918,7 @@ def local_gpu_pdbbreakpoint_op(node):
        new_outputs = []
        for i in range(len(new_op_outputs)):
            if input_transfered[i]:
-                new_outputs.append(host_from_gpu(new_op_outputs[i]))
+                new_outputs.append(new_op_outputs[i].transfer('cpu'))
            else:
                new_outputs.append(new_op_outputs[i])

--- a/theano/misc/latence_gpu_transfert.py
+++ b/theano/misc/latence_gpu_transfert.py
@@ -9,7 +9,7 @@ import theano
 y = theano.tensor.fvector()
 x = theano.shared(np.zeros(1, dtype='float32'))
 f1 = theano.function([y], updates={x: y})
-f2 = theano.function([], theano.sandbox.cuda.host_from_gpu(x))
+f2 = theano.function([], x.transfer('cpu'))
 print(f1.maker.fgraph.toposort())
 print(f2.maker.fgraph.toposort())
 for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -29,8 +29,7 @@ from theano.gpuarray.basic_ops import GpuKernelBase, Kernel, infer_context_name,
 from theano.gpuarray.type import GpuArrayType
 from theano.gpuarray.fp16_help import write_w
 from theano.gpuarray.opt import (register_opt as register_gpua,
-                                 register_opt2,
+                                 register_opt2)
-                                 host_from_gpu as host_from_gpua)
 if theano.sandbox.cuda.cuda_available:
    from theano.sandbox.cuda import (CudaNdarrayType,
                                     float32_shared_constructor)
@@ -1621,7 +1620,7 @@ def local_gpua_mrg_graph(op, context_name, inputs, outputs):
                                    op.output_type.ndim,
                                    op.output_type.dtype,
                                    inputs[1])
-        return [outs[0], host_from_gpua(outs[1])]
+        return [outs[0], outs[1].transfer('cpu')]
 @register_gpua('fast_compile')

--- a/theano/tensor/nnet/conv3d2d.py
+++ b/theano/tensor/nnet/conv3d2d.py
@@ -332,7 +332,7 @@ def make_gpu_optimizer(op, to_gpu):
                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
                result_node = op()(*new_inp)
                copy_stack_trace(node.outputs[0], result_node)
-                transfer_node = cuda.host_from_gpu(result_node)
+                transfer_node = result_node.transfer('cpu')
                copy_stack_trace(node.outputs[0], transfer_node)
                return [transfer_node]
        if node.op == cuda.gpu_from_host: