提交 d7688cea authored 作者: sentient07's avatar sentient07

Cached GpuFromHost

上级 44480978
...@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name): ...@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor # If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType): if isinstance(x.type, tensor.TensorType):
return GpuFromHost(context_name)(x) return gpu_from_host(context_name)(x)
# Try _as_GpuArrayVariable if possible # Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'): if hasattr(x, '_as_GpuArrayVariable'):
...@@ -544,7 +544,7 @@ class HostFromGpu(Op): ...@@ -544,7 +544,7 @@ class HostFromGpu(Op):
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
return [GpuFromHost(inputs[0].type.context_name)(gz)] return [gpu_from_host(inputs[0].type.context_name)(gz)]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
ev, = eval_points ev, = eval_points
...@@ -647,6 +647,14 @@ class GpuFromHost(Op): ...@@ -647,6 +647,14 @@ class GpuFromHost(Op):
return (9,) return (9,)
# Caching GPUAlloc
def gpu_from_host(ctx):
if ctx not in gpu_alloc.cache:
gpu_from_host.cache[ctx] = GpuFromHost(ctx)
return gpu_from_host.cache[ctx]
gpu_from_host.cache = {}
class GpuToGpu(Op): class GpuToGpu(Op):
""" """
Transfer data between GPUs. Transfer data between GPUs.
......
...@@ -36,7 +36,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name, ...@@ -36,7 +36,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous, GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc) GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc, gpu_from_host)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch, from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace, gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
gpugemv_no_inplace, gpugemv_inplace) gpugemv_no_inplace, gpugemv_inplace)
...@@ -148,7 +148,7 @@ gpu_optimizer.register('local_remove_all_assert', ...@@ -148,7 +148,7 @@ gpu_optimizer.register('local_remove_all_assert',
def safe_to_gpu(x, ctx_name): def safe_to_gpu(x, ctx_name):
if isinstance(x.type, tensor.TensorType): if isinstance(x.type, tensor.TensorType):
return GpuFromHost(ctx_name)(x) return gpu_from_host(ctx_name)(x)
else: else:
return x return x
...@@ -251,7 +251,7 @@ class InputToGpuOptimizer(Optimizer): ...@@ -251,7 +251,7 @@ class InputToGpuOptimizer(Optimizer):
continue continue
try: try:
new_input = host_from_gpu(GpuFromHost(target)(input)) new_input = host_from_gpu(gpu_from_host(target)(input))
fgraph.replace_validate(input, new_input, fgraph.replace_validate(input, new_input,
"InputToGpuOptimizer") "InputToGpuOptimizer")
except TypeError: except TypeError:
......
...@@ -233,7 +233,7 @@ class GpuArrayType(Type): ...@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return data return data
def filter_variable(self, other, allow_convert=True): def filter_variable(self, other, allow_convert=True):
from theano.gpuarray import GpuFromHost from theano.gpuarray.basic_ops import gpu_from_host
if hasattr(other, '_as_GpuArrayVariable'): if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name) other = other._as_GpuArrayVariable(self.context_name)
...@@ -265,7 +265,7 @@ class GpuArrayType(Type): ...@@ -265,7 +265,7 @@ class GpuArrayType(Type):
str(self.broadcastable))) str(self.broadcastable)))
other = other2 other = other2
return GpuFromHost(self.context_name)(other) return gpu_from_host(self.context_name)(other)
@staticmethod @staticmethod
def values_eq(a, b, force_same_dtype=True): def values_eq(a, b, force_same_dtype=True):
......
...@@ -152,13 +152,13 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -152,13 +152,13 @@ def traverse(out, x, x_copy, d, visited=None):
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda from theano.sandbox import cuda
from theano import gpuarray from theano.gpuarray.basic_ops import gpu_from_host
if out == x: if out == x:
if isinstance(x.type, cuda.CudaNdarrayType): if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy) d[out] = cuda.gpu_from_host(x_copy)
else: else:
assert isinstance(x.type, gpuarray.GpuArrayType) assert isinstance(x.type, gpuarray.GpuArrayType)
d[out] = gpuarray.GpuFromHost(x.type.context_name)(x_copy) d[out] = gpu_from_host(x.type.context_name)(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
return d return d
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论