Remove tentacles in scan.

df95d9a9 · Arnaud Bergeron · a388d94d · df95d9a9 · df95d9a9 · df95d9a9
--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -998,22 +998,20 @@ def scan(fn,
                  shared_inner_outputs)
    if condition is not None:
        inner_outs.append(condition)
-    # Cuda and Gpuarray are imported here, instead of being imported on top of
+    # gpuarray is imported here, instead of being imported on top of
    # the file because that would force on the user some dependencies that we
    # might do not want to. Currently we are working on removing the
    # dependencies on sandbox code completeley.
-    from theano.sandbox import cuda
    from theano import gpuarray
-    if cuda.cuda_available or gpuarray.pygpu_activated:
+    if gpuarray.pygpu_activated:
        # very often we end up in this situation when we want to
        # replace w with w_copy, where w is a GPU variable
        # and w_copy is TensorType. This is caused because shared
-        # variables are put on GPU right aways >:| ,
+        # variables are put on GPU right away >:| ,
        new_givens = OrderedDict()
        for w, w_copy in iteritems(givens):
-            if ((isinstance(w.type, cuda.CudaNdarrayType) or
+            if (isinstance(w.type, gpuarray.GpuArrayType) and
-                 isinstance(w.type, gpuarray.GpuArrayType)) and
                isinstance(w_copy.type, tensor.TensorType)):
                for o in inner_outs:
                    new_givens = traverse(o, w, w_copy, new_givens)
@@ -1046,7 +1044,7 @@ def scan(fn,
    info['name'] = name
    info['mode'] = mode
    info['destroy_map'] = OrderedDict()
-    info['gpu'] = False
+    info['gpua'] = False
    info['as_while'] = as_while
    info['profile'] = profile
    info['allow_gc'] = allow_gc
@@ -1072,7 +1070,7 @@ def scan(fn,
            arg = tensor.as_tensor_variable(arg)
        except TypeError:
            # This happens for Random States for e.g. but it is a good way
-            # to make sure no input is a cuda ndarrays
+            # to make sure all inputs are tensors.
            pass
        scan_inputs += [arg]
    scan_outs = local_op(*scan_inputs)

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -125,8 +125,6 @@ class Scan(PureOp):
                 info,
                 typeConstructor=None,
                 ):
-        if 'gpua' not in info:
-            info['gpua'] = False
        # adding properties into self
        self.inputs = inputs
        self.outputs = outputs
@@ -204,7 +202,7 @@ class Scan(PureOp):
                                   self.n_shared_outs)
        self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
        self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
-        if self.info['gpu'] or self.info['gpua']:
+        if self.info['gpua']:
            self._hash_inner_graph = self.info['gpu_hash']
        else:
            # Do the missing inputs check here to have the error early.
@@ -250,27 +248,6 @@ class Scan(PureOp):
                                    "type '%s' and '%s' respectively." %
                                    (self.name, type_input, type_output))
-        # If scan has the flag 'gpu' set to false (meaning that is shouldn't
-        # use the CUDA gpu backend ), ensure that is has no input and no
-        # output with type CudaNdarrayType
-        from theano.sandbox.cuda import CudaNdarrayType
-        if not self.info.get("gpu", False):
-            for inp in self.inputs:
-                if isinstance(inp.type, CudaNdarrayType):
-                    raise TypeError("Inconsistency in the inner graph of "
-                                    "scan '%s' : one of the inputs to the "
-                                    "inner graph is of type CudaNdarray but "
-                                    "the attributes of the scan op indicate "
-                                    "that it shouldn't be the case")
-            for out in self.outputs:
-                if isinstance(out.type, CudaNdarrayType):
-                    raise TypeError("Inconsistency in the inner graph of "
-                                    "scan '%s' : one of the outputs to the "
-                                    "inner graph is of type CudaNdarray but "
-                                    "the attributes of the scan op indicate "
-                                    "that it shouldn't be the case")
        # If scan has the flag 'gpua' set to false (meaning that is shouldn't
        # use the gpuarray gpu backend ), ensure that is has no input and no
        # output with type GpuArrayType
@@ -297,9 +274,6 @@ class Scan(PureOp):
        if "allow_gc" not in self.__dict__:
            self.allow_gc = True
            self.info['allow_gc'] = True
-        if not hasattr(self, 'gpua'):
-            self.gpua = False
-            self.info['gpua'] = False
        if not hasattr(self, 'var_mappings'):
            # Generate the mappings between inner and outer inputs and outputs
            # if they haven't already been generated.
@@ -436,9 +410,9 @@ class Scan(PureOp):
        def format(var, as_var):
            """
            This functions ensures that ``out`` has the same dtype as
-            ``inp`` as well as calling filter_variable to make sure they are
+            ``inp`` as well as calling filter_variable to make sure
-            both TensorType or CudaNdarrayType. It internally deals with the
+            they are both TensorType or GpuArrayType. It internally
-            corner case where inp.ndim + 1 = out.ndim
+            deals with the corner case where inp.ndim + 1 = out.ndim
            """
            if not hasattr(var, 'dtype'):
@@ -672,7 +646,7 @@ class Scan(PureOp):
                         'n_seqs', 'tap_array',
                         'as_while', 'n_mit_sot', 'destroy_map',
                         'n_nit_sot', 'n_shared_outs',
-                         'n_sit_sot', 'gpu', 'gpua', 'n_mit_mot_outs',
+                         'n_sit_sot', 'gpua', 'n_mit_mot_outs',
                         'n_mit_mot', 'mit_mot_out_slices']
        # This are some safety checks ( namely that the inner graph has the
        # same number of inputs and same number of outputs )
@@ -696,7 +670,7 @@ class Scan(PureOp):
                                             other.inputs)
    def __str__(self):
-        if self.gpu:
+        if self.gpua:
            gpu_str = 'gpu'
        else:
            gpu_str = 'cpu'
@@ -1318,7 +1292,7 @@ class Scan(PureOp):
                pdx = offset + self.n_shared_outs
                output_storage[pdx].storage[0] = None
-            # 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays,
+            # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
            # etc) currently in the output_storage to be able to compare them
            # with the actual outputs of the inner function after its
            # execution. Also keep pointers to their data to be able to detect
@@ -1336,7 +1310,7 @@ class Scan(PureOp):
                else:
                    old_output_data[idx] = var.gpudata
-            # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
+            # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
            # etc) associated with mitmot inputs currently in the
            # input_storage to be able to compare them with the content of the
            # input_storage after the execution of the function. Also keep
@@ -2514,7 +2488,7 @@ class Scan(PureOp):
        info['n_seqs'] = len(outer_inp_seqs)
        info['n_mit_sot'] = 0
        info['tap_array'] = new_tap_array
-        info['gpu'] = False
+        info['gpua'] = False
        info['n_mit_mot'] = len(outer_inp_mitmot)
        info['n_mit_mot_outs'] = n_mitmot_outs
        info['mit_mot_out_slices'] = mitmot_out_taps
@@ -2683,7 +2657,7 @@ class Scan(PureOp):
        info['n_mit_mot'] = self.n_mit_mot * 2
        info['n_nit_sot'] = self.n_nit_sot * 2
        info['n_shared_outs'] = self.n_shared_outs
-        info['gpu'] = False
+        info['gpua'] = False
        info['as_while'] = self.as_while
        info['profile'] = self.profile
        info['truncate_gradient'] = self.truncate_gradient

--- a/theano/scan_module/scan_opt.py
+++ b/theano/scan_module/scan_opt.py
@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer):
    """
-    def __init__(self, typeInfer=None, gpu_flag=False, gpua_flag=False):
+    def __init__(self, typeInfer=None, gpua_flag=False):
        Optimizer.__init__(self)
        self.typeInfer = typeInfer
-        self.gpu_flag = gpu_flag
        self.gpua_flag = gpua_flag
    def add_requirements(self, fgraph):
@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer):
    def apply(self, fgraph):
-        # Depending on the values of gpu_flag and gpua_flag, get the list of
+        # Depending on the value of gpua_flag, get the list of memory
-        # memory allocation ops that the optimization should be able to handle
+        # allocation ops that the optimization should be able to
+        # handle
        alloc_ops = (Alloc, AllocEmpty)
-        if self.gpu_flag:
-            alloc_ops += (theano.sandbox.cuda.GpuAlloc,
-                          theano.sandbox.cuda.GpuAllocEmpty)
        if self.gpua_flag:
            # gpuarray might be imported but not its GpuAlloc and
            # GpuAllopEmpty ops.
@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer):
        nodes = fgraph.toposort()[::-1]
        scan_nodes = [x for x in nodes
                      if (isinstance(x.op, scan_op.Scan) and
-                          x.op.info['gpu'] == self.gpu_flag and
                          x.op.info['gpua'] == self.gpua_flag)]
        for scan_idx in xrange(len(scan_nodes)):
@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
 # ScanSaveMem should execute only once per node.
 optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan')
 optdb.register('scanOp_make_inplace',
-               ScanInplaceOptimizer(typeInfer=None,
+               ScanInplaceOptimizer(typeInfer=None),
-                                    gpu_flag=False),
               75,
               'fast_run',
               'inplace',

--- a/theano/scan_module/scan_perform.pyx
+++ b/theano/scan_module/scan_perform.pyx
@@ -355,7 +355,7 @@ def perform(
            pdx = offset + n_shared_outs
            output_storage[<unsigned int>pdx].storage[0] = None
-        # 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays,
+        # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
        # etc) currently in the output_storage to be able to compare them
        # with the actual outputs of the inner function after its
        # execution. Also keep pointers to their data to be able to detect
@@ -373,7 +373,7 @@ def perform(
            else:
                old_output_data[idx] = var.gpudata
-        # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
+        # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
        # etc) associated with mitmot inputs currently in the input_storage to
        # be able to compare them with the content of the input_storage after
        # the execution of the function. Also keep pointers to their data to

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None):
        try:
            x = tensor.as_tensor_variable(x)
        except TypeError:
-            # This could happen for example for random states, and I really
+            # This could happen for example for random states
-            # want to avoid the convoluted logic that checks for cuda
-            # ndarrays
            pass
    # Cast x if needed. If x has a test value, this will also cast it.
@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None):
    if out in visited:
        return d
    visited.add(out)
-    from theano.sandbox import cuda
    from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
    from theano.gpuarray import pygpu_activated
    from theano.gpuarray.type import GpuArrayType
    if out == x:
-        if isinstance(x.type, cuda.CudaNdarrayType):
+        assert isinstance(x.type, GpuArrayType)
-            d[out] = cuda.gpu_from_host(x_copy)
+        d[out] = GpuFromHost(x.type.context_name)(x_copy)
-        else:
-            assert isinstance(x.type, GpuArrayType)
-            d[out] = GpuFromHost(x.type.context_name)(x_copy)
        return d
    elif out.owner is None:
        return d
-    elif (cuda.cuda_available and
-          out.owner.op == cuda.host_from_gpu and
-          out.owner.inputs == [x]):
-        d[out] = tensor.as_tensor_variable(x_copy)
-        return d
    elif (pygpu_activated and
          out.owner.op == host_from_gpu and
          out.owner.inputs == [x]):
@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs):
    info['n_nit_sot'] = 0
    info['truncate_gradient'] = op.info['truncate_gradient']
    info['name'] = op.info['name']
-    info['gpu'] = op.info['gpu']
    info['gpua'] = op.info['gpua']
    info['mode'] = op.info['mode']
    info['as_while'] = op.info['as_while']
@@ -1257,7 +1245,7 @@ class scan_args(object):
        self.other_info = OrderedDict()
        for k in ('truncate_gradient', 'name', 'mode', 'destroy_map',
-                  'gpu', 'gpua', 'as_while', 'profile', 'allow_gc'):
+                  'gpua', 'as_while', 'profile', 'allow_gc'):
            if k in info:
                self.other_info[k] = info[k]

--- a/theano/scan_module/tests/inconsistent_scan.pkl
+++ b/theano/scan_module/tests/inconsistent_scan.pkl
--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -4872,68 +4872,6 @@ class ScanGpuTests:
        utt.assert_allclose(output, expected_output)
-class T_Scan_Cuda(unittest.TestCase, ScanGpuTests):
-    """This class takes the gpu tests for scan that are defined in
-    class ScanGpuTests and runs them using the cuda backend. It also adds
-    tests specific to the cuda backend
-    """
-    def __init__(self, *args, **kwargs):
-        from theano.sandbox import cuda
-        self.gpu_backend = cuda
-        self.mode_with_gpu = mode_with_gpu
-        self.mode_with_gpu_nodebug = mode_with_gpu_nodebug
-        super(T_Scan_Cuda, self).__init__(*args, **kwargs)
-    def setUp(self):
-        # Skip the test if cuda is not available
-        if not self.gpu_backend.cuda_available:
-            raise SkipTest('Optional package cuda disabled')
-        utt.seed_rng()
-        super(T_Scan_Cuda, self).setUp()
-    def is_scan_on_gpu(self, node):
-        return node.op.info.get('gpu', False)
-    def test_inconsistent_inner_fct(self):
-        # Test that scan can detect inconsistencies in the inner graph and
-        # raises an appropriate exception. The pickled file used in this test
-        # relies on the cuda backend.
-        # This test has not been extensively tested for Python 3 so it should
-        # be skipped if python version is >=3
-        version = sys.version_info
-        if version >= (3,):
-            raise SkipTest("This test relies on a pickled file produced with "
-                           "Python 2. The current python version "
-                           "(%i.%i.%i.%i) is >= 3 so the test will be "
-                           "skipped." % (version.major, version.minor,
-                           version.micro, version.serial))
-        # When unpickled, the scan op should perform validation on its inner
-        # graph, detect the inconsistencies and raise a TypeError
-        folder = os.path.dirname(os.path.abspath(__file__))
-        path = os.path.join(folder, "inconsistent_scan.pkl")
-        assert_raises(TypeError, pickle.load, open(path, "r"))
-    def test_consistent_inner_fct(self):
-        # Test that scan does not falsely detect inconsistencies in a valid
-        # inner graph
-        rs = theano.sandbox.rng_mrg.MRG_RandomStreams(use_cuda=True)
-        output, _ = theano.scan(lambda : rs.uniform((3,), dtype="float32"),
-                                n_steps=3)
-        pickle.loads(pickle.dumps(output))
-        # Also ensure that, after compilation, the Scan has been moved
-        # on the gpu
-        fct = theano.function([], output, mode=self.mode_with_gpu)
-        scan_nodes = scan_nodes_from_fct(fct)
-        assert len(scan_nodes) == 1
-        assert self.is_scan_on_gpu(scan_nodes[0])
 class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests):
    """This class takes the gpu tests for scan that are defined in
    class ScanGpuTests and runs them using the gpuarray backend.