Merge pull request #2925 from carriepl/scan_gpuarray_memory_usage

Scan gpuarray memory usage

Merge pull request #2925 from carriepl/scan_gpuarray_memory_usage
cfbe7c3c · abergeron · 7359170d · d09f4bb0 · cfbe7c3c · cfbe7c3c
--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -318,7 +318,7 @@ def scan(fn,

    :param strict:
        If true, all the shared variables used in ``fn`` must be provided as a
-        part of ``non_sequences`` or ``sequences``. 
+        part of ``non_sequences`` or ``sequences``.

    :rtype: tuple
    :return: tuple of the form (outputs, updates); ``outputs`` is either a
@@ -962,21 +962,23 @@ def scan(fn,
                  shared_inner_outputs)
    if condition is not None:
        inner_outs.append(condition)
-    # Cuda is imported here, instead of being imported on top of the file
-    # because forces on the user some dependencies that we might do not want
-    # to. Currently we are working on removing the dependencies on sandbox
-    # code completeley.
-    from theano.sandbox import cuda
-    if cuda.cuda_available:
+    # Cuda and Gpuarray are imported here, instead of being imported on top of
+    # the file because that would force on the user some dependencies that we
+    # might do not want to. Currently we are working on removing the
+    # dependencies on sandbox code completeley.
+    from theano.sandbox import cuda, gpuarray
+    if cuda.cuda_available or gpuarray.pygpu_activated:
        # very often we end up in this situation when we want to
-        # replace w with w_copy, where w is CudaNdarray
+        # replace w with w_copy, where w is a GPU variable
        # and w_copy is TensorType. This is caused because shared
        # variables are put on GPU right aways >:| ,
        new_givens = OrderedDict()

        for w, w_copy in givens.iteritems():
-            if (isinstance(w.type, cuda.CudaNdarrayType)
-                and isinstance(w_copy.type, tensor.TensorType)):
+            if ((isinstance(w.type, cuda.CudaNdarrayType) or
+                 isinstance(w.type, gpuarray.GpuArrayType)) and
+                isinstance(w_copy.type, tensor.TensorType)):
+
                for o in inner_outs:
                    new_givens = traverse(o, w, w_copy, new_givens)
            else:

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -127,9 +127,13 @@ def traverse(out, x, x_copy, d, visited=None):
    if out in visited:
        return d
    visited.add(out)
-    import theano.sandbox.cuda as cuda
+    from theano.sandbox import cuda, gpuarray
    if out == x:
-        d[out] = cuda.gpu_from_host(x_copy)
+        if isinstance(x.type, cuda.CudaNdarrayType):
+            d[out] = cuda.gpu_from_host(x_copy)
+        else:
+            assert isinstance(x.type, gpuarray.GpuArrayType)
+            d[out] = gpuarray.gpu_from_host(x_copy)
        return d
    elif out.owner is None:
        return d
@@ -138,6 +142,11 @@ def traverse(out, x, x_copy, d, visited=None):
          out.owner.inputs == [x]):
        d[out] = tensor.as_tensor_variable(x_copy)
        return d
+    elif (gpuarray.pygpu_activated and
+          out.owner.op == gpuarray.host_from_gpu and
+          out.owner.inputs == [x]):
+        d[out] = tensor.as_tensor_variable(x_copy)
+        return d
    else:
        for inp in out.owner.inputs:
            d = traverse(inp, x, x_copy, d, visited)