Cached GpuFromHost

d7688cea · sentient07 · 44480978 · d7688cea · d7688cea · d7688cea
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
        # If we couldn't deal with transfers, then maybe it's a tensor
        if isinstance(x.type, tensor.TensorType):
-            return GpuFromHost(context_name)(x)
+            return gpu_from_host(context_name)(x)
    # Try _as_GpuArrayVariable if possible
    if hasattr(x, '_as_GpuArrayVariable'):
@@ -544,7 +544,7 @@ class HostFromGpu(Op):
    def grad(self, inputs, grads):
        gz, = grads
-        return [GpuFromHost(inputs[0].type.context_name)(gz)]
+        return [gpu_from_host(inputs[0].type.context_name)(gz)]
    def R_op(self, inputs, eval_points):
        ev, = eval_points
@@ -647,6 +647,14 @@ class GpuFromHost(Op):
        return (9,)
+# Caching GPUAlloc
+def gpu_from_host(ctx):
+    if ctx not in gpu_alloc.cache:
+        gpu_from_host.cache[ctx] = GpuFromHost(ctx)
+    return gpu_from_host.cache[ctx]
+gpu_from_host.cache = {}
 class GpuToGpu(Op):
    """
    Transfer data between GPUs.

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -36,7 +36,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
                        HostFromGpu, GpuFromHost,
                        GpuSplit, GpuContiguous, gpu_contiguous,
                        GpuAlloc, GpuAllocEmpty, GpuReshape,
-                        GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc)
+                        GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc, gpu_from_host)
 from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
                   gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
                   gpugemv_no_inplace, gpugemv_inplace)
@@ -148,7 +148,7 @@ gpu_optimizer.register('local_remove_all_assert',
 def safe_to_gpu(x, ctx_name):
    if isinstance(x.type, tensor.TensorType):
-        return GpuFromHost(ctx_name)(x)
+        return gpu_from_host(ctx_name)(x)
    else:
        return x
@@ -251,7 +251,7 @@ class InputToGpuOptimizer(Optimizer):
                continue
            try:
-                new_input = host_from_gpu(GpuFromHost(target)(input))
+                new_input = host_from_gpu(gpu_from_host(target)(input))
                fgraph.replace_validate(input, new_input,
                                        "InputToGpuOptimizer")
            except TypeError:

--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -233,7 +233,7 @@ class GpuArrayType(Type):
        return data
    def filter_variable(self, other, allow_convert=True):
-        from theano.gpuarray import GpuFromHost
+        from theano.gpuarray.basic_ops import gpu_from_host
        if hasattr(other, '_as_GpuArrayVariable'):
            other = other._as_GpuArrayVariable(self.context_name)
@@ -265,7 +265,7 @@ class GpuArrayType(Type):
                                 str(self.broadcastable)))
            other = other2
-        return GpuFromHost(self.context_name)(other)
+        return gpu_from_host(self.context_name)(other)
    @staticmethod
    def values_eq(a, b, force_same_dtype=True):

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -152,13 +152,13 @@ def traverse(out, x, x_copy, d, visited=None):
        return d
    visited.add(out)
    from theano.sandbox import cuda
-    from theano import gpuarray
+    from theano.gpuarray.basic_ops import gpu_from_host
    if out == x:
        if isinstance(x.type, cuda.CudaNdarrayType):
            d[out] = cuda.gpu_from_host(x_copy)
        else:
            assert isinstance(x.type, gpuarray.GpuArrayType)
-            d[out] = gpuarray.GpuFromHost(x.type.context_name)(x_copy)
+            d[out] = gpu_from_host(x.type.context_name)(x_copy)
        return d
    elif out.owner is None:
        return d