提交 d7688cea authored 作者: sentient07's avatar sentient07

Cached GpuFromHost

上级 44480978
......@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType):
return GpuFromHost(context_name)(x)
return gpu_from_host(context_name)(x)
# Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'):
......@@ -544,7 +544,7 @@ class HostFromGpu(Op):
def grad(self, inputs, grads):
gz, = grads
return [GpuFromHost(inputs[0].type.context_name)(gz)]
return [gpu_from_host(inputs[0].type.context_name)(gz)]
def R_op(self, inputs, eval_points):
ev, = eval_points
......@@ -647,6 +647,14 @@ class GpuFromHost(Op):
return (9,)
# Caching GPUAlloc
def gpu_from_host(ctx):
if ctx not in gpu_alloc.cache:
gpu_from_host.cache[ctx] = GpuFromHost(ctx)
return gpu_from_host.cache[ctx]
gpu_from_host.cache = {}
class GpuToGpu(Op):
"""
Transfer data between GPUs.
......
......@@ -36,7 +36,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc)
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc, gpu_from_host)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
gpugemv_no_inplace, gpugemv_inplace)
......@@ -148,7 +148,7 @@ gpu_optimizer.register('local_remove_all_assert',
def safe_to_gpu(x, ctx_name):
if isinstance(x.type, tensor.TensorType):
return GpuFromHost(ctx_name)(x)
return gpu_from_host(ctx_name)(x)
else:
return x
......@@ -251,7 +251,7 @@ class InputToGpuOptimizer(Optimizer):
continue
try:
new_input = host_from_gpu(GpuFromHost(target)(input))
new_input = host_from_gpu(gpu_from_host(target)(input))
fgraph.replace_validate(input, new_input,
"InputToGpuOptimizer")
except TypeError:
......
......@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return data
def filter_variable(self, other, allow_convert=True):
from theano.gpuarray import GpuFromHost
from theano.gpuarray.basic_ops import gpu_from_host
if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name)
......@@ -265,7 +265,7 @@ class GpuArrayType(Type):
str(self.broadcastable)))
other = other2
return GpuFromHost(self.context_name)(other)
return gpu_from_host(self.context_name)(other)
@staticmethod
def values_eq(a, b, force_same_dtype=True):
......
......@@ -152,13 +152,13 @@ def traverse(out, x, x_copy, d, visited=None):
return d
visited.add(out)
from theano.sandbox import cuda
from theano import gpuarray
from theano.gpuarray.basic_ops import gpu_from_host
if out == x:
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, gpuarray.GpuArrayType)
d[out] = gpuarray.GpuFromHost(x.type.context_name)(x_copy)
d[out] = gpu_from_host(x.type.context_name)(x_copy)
return d
elif out.owner is None:
return d
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论