提交 3db235a7 authored 作者: sentient07's avatar sentient07 提交者: Reyhane Askari

replaced host_to_gpu with transfer

上级 a4126bcc
...@@ -663,8 +663,8 @@ class GpuFromHost(Op): ...@@ -663,8 +663,8 @@ class GpuFromHost(Op):
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
return [host_from_gpu(as_gpuarray_variable( return [as_gpuarray_variable(
gz, context_name=self.context_name))] gz, context_name=self.context_name).transfer('cpu')]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
ev, = eval_points ev, = eval_points
...@@ -1132,7 +1132,7 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -1132,7 +1132,7 @@ class GpuReshape(HideC, tensor.Reshape):
ctx_name = infer_context_name(x) ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name=ctx_name) x = as_gpuarray_variable(x, context_name=ctx_name)
shp = tensor.as_tensor_variable(shp) shp = tensor.as_tensor_variable(shp)
res = host_from_gpu(x).reshape(shp, ndim=self.ndim) res = x.transfer('cpu').reshape(shp, ndim=self.ndim)
otype = GpuArrayType(dtype=res.dtype, otype = GpuArrayType(dtype=res.dtype,
broadcastable=res.broadcastable, broadcastable=res.broadcastable,
context_name=ctx_name) context_name=ctx_name)
......
...@@ -172,7 +172,7 @@ def safe_to_gpu(x, ctx_name): ...@@ -172,7 +172,7 @@ def safe_to_gpu(x, ctx_name):
def safe_to_cpu(x): def safe_to_cpu(x):
if isinstance(x.type, GpuArrayType): if isinstance(x.type, GpuArrayType):
return host_from_gpu(x) return x.transfer('cpu')
else: else:
return x return x
...@@ -236,7 +236,7 @@ def op_lifter(OP, cuda_only=False): ...@@ -236,7 +236,7 @@ def op_lifter(OP, cuda_only=False):
elif isinstance(new_op, (tuple, list)): elif isinstance(new_op, (tuple, list)):
return [safe_to_cpu(o) for o in new_op] return [safe_to_cpu(o) for o in new_op]
else: # suppose it is a variable on the GPU else: # suppose it is a variable on the GPU
return [host_from_gpu(new_op)] return [new_op.transfer('cpu')]
return False return False
local_opt.__name__ = maker.__name__ local_opt.__name__ = maker.__name__
return local_optimizer(OP)(local_opt) return local_optimizer(OP)(local_opt)
...@@ -269,7 +269,7 @@ class InputToGpuOptimizer(Optimizer): ...@@ -269,7 +269,7 @@ class InputToGpuOptimizer(Optimizer):
continue continue
try: try:
new_input = host_from_gpu(gpu_from_host(target)(input)) new_input = gpu_from_host(target)(input).transfer('cpu')
fgraph.replace_validate(input, new_input, fgraph.replace_validate(input, new_input,
"InputToGpuOptimizer") "InputToGpuOptimizer")
except TypeError: except TypeError:
...@@ -430,7 +430,7 @@ class GraphToGPU(Optimizer): ...@@ -430,7 +430,7 @@ class GraphToGPU(Optimizer):
new_o.owner.inputs[0].type == o.type): new_o.owner.inputs[0].type == o.type):
new_o = new_o.owner.inputs[0] new_o = new_o.owner.inputs[0]
else: else:
new_o = safe_to_cpu(new_o) new_o = new_o.transfer('cpu')
new_nodes.append(new_o) new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes), fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes),
reason=self.__class__.__name__) reason=self.__class__.__name__)
...@@ -546,7 +546,7 @@ def local_cut_gpu_transfers(node): ...@@ -546,7 +546,7 @@ def local_cut_gpu_transfers(node):
# gpub -> # gpub ->
if isinstance(n2.op, GpuToGpu): if isinstance(n2.op, GpuToGpu):
return [host_from_gpu(n2.inputs[0])] return [n2.inputs[0].transfer('cpu')]
# ? -> gpua -> gpub # ? -> gpua -> gpub
elif isinstance(node.op, GpuToGpu): elif isinstance(node.op, GpuToGpu):
...@@ -600,7 +600,7 @@ def local_gpua_alloc2(node): ...@@ -600,7 +600,7 @@ def local_gpua_alloc2(node):
i.owner.op in [host_from_gpu, tensor.alloc] i.owner.op in [host_from_gpu, tensor.alloc]
for i in c.inputs[1:]) for i in c.inputs[1:])
for c, idx in node.outputs[0].clients)): for c, idx in node.outputs[0].clients)):
return [host_from_gpu(gpu_alloc(None)(*node.inputs))] return [gpu_alloc(None)(*node.inputs).transfer('cpu')]
@register_opt('fast_compile') @register_opt('fast_compile')
...@@ -918,7 +918,7 @@ def local_gpu_pdbbreakpoint_op(node): ...@@ -918,7 +918,7 @@ def local_gpu_pdbbreakpoint_op(node):
new_outputs = [] new_outputs = []
for i in range(len(new_op_outputs)): for i in range(len(new_op_outputs)):
if input_transfered[i]: if input_transfered[i]:
new_outputs.append(host_from_gpu(new_op_outputs[i])) new_outputs.append(new_op_outputs[i].transfer('cpu'))
else: else:
new_outputs.append(new_op_outputs[i]) new_outputs.append(new_op_outputs[i])
......
...@@ -9,7 +9,7 @@ import theano ...@@ -9,7 +9,7 @@ import theano
y = theano.tensor.fvector() y = theano.tensor.fvector()
x = theano.shared(np.zeros(1, dtype='float32')) x = theano.shared(np.zeros(1, dtype='float32'))
f1 = theano.function([y], updates={x: y}) f1 = theano.function([y], updates={x: y})
f2 = theano.function([], theano.sandbox.cuda.host_from_gpu(x)) f2 = theano.function([], x.transfer('cpu'))
print(f1.maker.fgraph.toposort()) print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort()) print(f2.maker.fgraph.toposort())
for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]: for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
......
...@@ -29,8 +29,7 @@ from theano.gpuarray.basic_ops import GpuKernelBase, Kernel, infer_context_name, ...@@ -29,8 +29,7 @@ from theano.gpuarray.basic_ops import GpuKernelBase, Kernel, infer_context_name,
from theano.gpuarray.type import GpuArrayType from theano.gpuarray.type import GpuArrayType
from theano.gpuarray.fp16_help import write_w from theano.gpuarray.fp16_help import write_w
from theano.gpuarray.opt import (register_opt as register_gpua, from theano.gpuarray.opt import (register_opt as register_gpua,
register_opt2, register_opt2)
host_from_gpu as host_from_gpua)
if theano.sandbox.cuda.cuda_available: if theano.sandbox.cuda.cuda_available:
from theano.sandbox.cuda import (CudaNdarrayType, from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor) float32_shared_constructor)
...@@ -1621,7 +1620,7 @@ def local_gpua_mrg_graph(op, context_name, inputs, outputs): ...@@ -1621,7 +1620,7 @@ def local_gpua_mrg_graph(op, context_name, inputs, outputs):
op.output_type.ndim, op.output_type.ndim,
op.output_type.dtype, op.output_type.dtype,
inputs[1]) inputs[1])
return [outs[0], host_from_gpua(outs[1])] return [outs[0], outs[1].transfer('cpu')]
@register_gpua('fast_compile') @register_gpua('fast_compile')
......
...@@ -332,7 +332,7 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -332,7 +332,7 @@ def make_gpu_optimizer(op, to_gpu):
new_inp[idx] = cuda.gpu_from_host(new_inp[idx]) new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
result_node = op()(*new_inp) result_node = op()(*new_inp)
copy_stack_trace(node.outputs[0], result_node) copy_stack_trace(node.outputs[0], result_node)
transfer_node = cuda.host_from_gpu(result_node) transfer_node = result_node.transfer('cpu')
copy_stack_trace(node.outputs[0], transfer_node) copy_stack_trace(node.outputs[0], transfer_node)
return [transfer_node] return [transfer_node]
if node.op == cuda.gpu_from_host: if node.op == cuda.gpu_from_host:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论