Merge pull request #3660 from carriepl/scan_cleanup

Clean up (outs|inps)_on_gpu to (outs|inps)_is_tensor

Merge pull request #3660 from carriepl/scan_cleanup
60e0ed1c · Frédéric Bastien · 62ccf59f · 38bd07fe · 60e0ed1c · 60e0ed1c
--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -321,16 +321,14 @@ class Scan(PureOp):
                # not having been preallocated
                self.mitmots_preallocated = [False] * self.n_mit_mot_outs

-            if not hasattr(self, 'outs_on_gpu'):
+            if not hasattr(self, 'outs_is_tensor'):
                # The thunk has been compiled before the analysis, at
                # compilation time, of the location of the inputs and outputs.
                # Perform this analysis here.
-                self.inps_on_gpu = [not isinstance(out,
-                                                   theano.tensor.TensorVariable)
-                                    for out in self.fn.maker.fgraph.inputs]
-                self.outs_on_gpu = [not isinstance(out,
-                                                   theano.tensor.TensorVariable)
-                                    for out in self.fn.maker.fgraph.outputs]
+                self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
+                                       for out in self.fn.maker.fgraph.inputs]
+                self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
+                                       for out in self.fn.maker.fgraph.outputs]

        # Ensure that the graph associated with the inner function is valid.
        self.validate_inner_graph()
@@ -871,10 +869,10 @@ class Scan(PureOp):

        # Analyse the compile inner function to determine which inputs and
        # outputs are on the gpu and speed up some checks during the execution
-        self.inps_on_gpu = [not isinstance(out, theano.tensor.TensorVariable)
-                            for out in self.fn.maker.fgraph.inputs]
-        self.outs_on_gpu = [not isinstance(out, theano.tensor.TensorVariable)
-                            for out in self.fn.maker.fgraph.outputs]
+        self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
+                               for out in self.fn.maker.fgraph.inputs]
+        self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
+                               for out in self.fn.maker.fgraph.outputs]

        try:
            cython_mintaps = numpy.asarray(self.mintaps, dtype='int32')
@@ -912,8 +910,10 @@ class Scan(PureOp):
            cython_mitmots_preallocated = numpy.asarray(self.mitmots_preallocated,
                                                        dtype='int32')

-            cython_inps_on_gpu = numpy.asarray(self.inps_on_gpu, dtype='int32')
-            cython_outs_on_gpu = numpy.asarray(self.outs_on_gpu, dtype='int32')
+            cython_inps_is_tensor = numpy.asarray(self.inps_is_tensor,
+                                                  dtype='int32')
+            cython_outs_is_tensor = numpy.asarray(self.outs_is_tensor,
+                                                  dtype='int32')

            if hasattr(self, 'destroy_map'):
                cython_destroy_map = [x in self.destroy_map
@@ -942,8 +942,8 @@ class Scan(PureOp):
                        cython_mit_mot_out_slices,
                        cython_mit_mot_out_nslices,
                        cython_mitmots_preallocated,
-                        cython_inps_on_gpu,
-                        cython_outs_on_gpu,
+                        cython_inps_is_tensor,
+                        cython_outs_is_tensor,
                        self.fn.fn,
                        self.fn,
                        cython_destroy_map,
@@ -1305,10 +1305,10 @@ class Scan(PureOp):

                if var is None:
                    old_output_data[idx] = None
-                elif self.outs_on_gpu[idx]:
-                    old_output_data[idx] = var.gpudata
-                else:
+                elif self.outs_is_tensor[idx]:
                    old_output_data[idx] = var.data
+                else:
+                    old_output_data[idx] = var.gpudata

            # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
            # etc) associated with mitmot inputs currently in the
@@ -1323,10 +1323,10 @@ class Scan(PureOp):

                if var is None:
                    old_mitmot_input_data[idx] = None
-                elif self.inps_on_gpu[idx]:
-                    old_mitmot_input_data[idx] = var.gpudata
-                else:
+                elif self.inps_is_tensor[idx]:
                    old_mitmot_input_data[idx] = var.data
+                else:
+                    old_mitmot_input_data[idx] = var.gpudata

            # 5.1 compute outputs
            t0_fn = time.time()
@@ -1388,10 +1388,10 @@ class Scan(PureOp):
                        new_var = input_storage[self.n_seqs + inp_idx].storage[0]
                        if old_var is new_var:
                            old_data = old_mitmot_input_data[inp_idx]
-                            if self.inps_on_gpu[self.n_seqs + inp_idx]:
-                                same_data = (new_var.gpudata == old_data)
-                            else:
+                            if self.inps_is_tensor[self.n_seqs + inp_idx]:
                                same_data = (new_var.data == old_data)
+                            else:
+                                same_data = (new_var.gpudata == old_data)
                        else:
                            same_data = False

@@ -1434,10 +1434,10 @@ class Scan(PureOp):
                        old_data = old_output_data[offset_out + j]
                        if old_data is None:
                            output_reused = False
-                        elif self.outs_on_gpu[offset_out + j]:
-                            output_reused = (new_var.gpudata == old_data)
-                        else:
+                        elif self.outs_is_tensor[offset_out + j]:
                            output_reused = (new_var.data == old_data)
+                        else:
+                            output_reused = (new_var.gpudata == old_data)
                    else:
                        output_reused = False

@@ -1477,10 +1477,10 @@ class Scan(PureOp):
                    if old_var is new_var:
                        if old_data is None:
                            output_reused = False
-                        elif self.outs_on_gpu[offset_out + j]:
-                            output_reused = (new_var.gpudata == old_data)
-                        else:
+                        elif self.outs_is_tensor[offset_out + j]:
                            output_reused = (new_var.data == old_data)
+                        else:
+                            output_reused = (new_var.gpudata == old_data)
                    else:
                        output_reused = False


--- a/theano/scan_module/scan_perform.c
+++ b/theano/scan_module/scan_perform.c
--- a/theano/scan_module/scan_perform.pyx
+++ b/theano/scan_module/scan_perform.pyx
@@ -62,7 +62,7 @@ import copy


 def get_version():
-    return 0.291
+    return 0.292

 @cython.boundscheck(False)
 def perform(
@@ -83,8 +83,8 @@ def perform(
            numpy.ndarray[numpy.int32_t,ndim=2] mit_mot_out_slices,
            numpy.ndarray[numpy.int32_t,ndim=1] mit_mot_out_nslices,
            numpy.ndarray[numpy.int32_t,ndim=1] mitmots_preallocated,
-            numpy.ndarray[numpy.int32_t,ndim=1] inps_on_gpu,
-            numpy.ndarray[numpy.int32_t,ndim=1] outs_on_gpu,
+            numpy.ndarray[numpy.int32_t,ndim=1] inps_is_tensor,
+            numpy.ndarray[numpy.int32_t,ndim=1] outs_is_tensor,
            fn,
            fnct,
            numpy.ndarray[numpy.int32_t,ndim=1] destroy_map,
@@ -138,11 +138,11 @@ def perform(
    mit_mot_out_nslices: int32 ndarray (Can be replaced by a list)
        Same as tap_array_len, but is the number of output taps of the
        mit_mot sequences (i.e. it corresponds to mit_mot_out_slices)
-    inps_on_gpu : int32 ndarray (Can be replaced by a list)
-        Array of boolean indicating, for every input, whether it is on the GPU
+    inps_is_tensor : int32 ndarray (Can be replaced by a list)
+        Array of boolean indicating, for every input, whether it is a tensor
        or not
-    outs_on_gpu : int32 ndarray (Can be replaced by a list)
-        Array of boolean indicating, for every output, whether it is on the GPU
+    outs_is_tensor : int32 ndarray (Can be replaced by a list)
+        Array of boolean indicating, for every output, whether it is a tensor
        or not
    fn: callable
        This is the linker, i.e. the function that will loop over the
@@ -368,10 +368,10 @@ def perform(

            if var is None:
                old_output_data[idx] = None
-            elif outs_on_gpu[idx]:
-                old_output_data[idx] = var.gpudata
-            else:
+            elif outs_is_tensor[idx]:
                old_output_data[idx] = var.data
+            else:
+                old_output_data[idx] = var.gpudata

        # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
        # etc) associated with mitmot inputs currently in the input_storage to
@@ -385,10 +385,10 @@ def perform(

            if var is None:
                old_mitmot_input_data[idx] = None
-            elif inps_on_gpu[idx]:
-                old_mitmot_input_data[idx] = var.gpudata
-            else:
+            elif inps_is_tensor[idx]:
                old_mitmot_input_data[idx] = var.data
+            else:
+                old_mitmot_input_data[idx] = var.gpudata

        # 5.1 compute outputs
        t0_fn = time.time()
@@ -450,10 +450,10 @@ def perform(
                    new_var = input_storage[n_seqs + inp_idx].storage[0]
                    if old_var is new_var:
                        old_data = old_mitmot_input_data[inp_idx]
-                        if inps_on_gpu[n_seqs + inp_idx]:
-                            same_data = (new_var.gpudata == old_data)
-                        else:
+                        if inps_is_tensor[n_seqs + inp_idx]:
                            same_data = (new_var.data == old_data)
+                        else:
+                            same_data = (new_var.gpudata == old_data)
                    else:
                        same_data = False

@@ -494,10 +494,10 @@ def perform(
                if old_var is new_var:
                    if old_data is None:
                        output_reused = False
-                    elif outs_on_gpu[offset_out + j]:
-                        output_reused = (new_var.gpudata == old_data)
-                    else:
+                    elif outs_is_tensor[offset_out + j]:
                        output_reused = (new_var.data == old_data)
+                    else:
+                        output_reused = (new_var.gpudata == old_data)
                else:
                    output_reused = False

@@ -536,10 +536,10 @@ def perform(
                if old_var is new_var:
                    if old_data is None:
                        output_reused = False
-                    elif outs_on_gpu[offset_out + j]:
-                        output_reused = (new_var.gpudata == old_data)
-                    else:
+                    elif outs_is_tensor[offset_out + j]:
                        output_reused = (new_var.data == old_data)
+                    else:
+                        output_reused = (new_var.gpudata == old_data)
                else:
                    output_reused = False


--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
@@ -17,7 +17,7 @@ from theano.gof import cmodule
 _logger = logging.getLogger('theano.scan_module.scan_perform')


-version = 0.291  # must match constant returned in function get_version()
+version = 0.292  # must match constant returned in function get_version()

 need_reload = False