Remove non-sequence settings from ScanInfo

7c4871ce · Brandon T. Willard · Brandon T. Willard · 6f685799 · 7c4871ce · 7c4871ce
--- a/aesara/scan/basic.py
+++ b/aesara/scan/basic.py
@@ -1033,6 +1033,13 @@ def scan(
        n_sit_sot=n_sit_sot,
        n_shared_outs=n_shared_outs,
        n_nit_sot=n_nit_sot,
+    )
+
+    local_op = Scan(
+        inner_inputs,
+        new_outs,
+        info,
+        mode=mode,
        truncate_gradient=truncate_gradient,
        name=name,
        gpua=False,
@@ -1042,8 +1049,6 @@ def scan(
        strict=strict,
    )

-    local_op = Scan(inner_inputs, new_outs, info, mode)
-
    ##
    # Step 8. Compute the outputs using the scan op
    ##

--- a/aesara/scan/op.py
+++ b/aesara/scan/op.py
@@ -100,13 +100,6 @@ class ScanInfo:
    n_sit_sot: int
    n_shared_outs: int
    n_nit_sot: int
-    truncate_gradient: bool = False
-    name: Optional[str] = None
-    gpua: bool = False
-    as_while: bool = False
-    profile: Optional[Union[str, bool]] = None
-    allow_gc: bool = True
-    strict: bool = True


 TensorConstructorType = Callable[[List[bool], Union[str, np.generic]], TensorType]
@@ -466,7 +459,7 @@ class ScanMethodsMixin:
        # output with type GpuArrayType
        from aesara.gpuarray import GpuArrayType

-        if not self.info.gpua:
+        if not self.gpua:
            for inp in self.inputs:
                if isinstance(inp.type, GpuArrayType):
                    raise TypeError(
@@ -496,6 +489,13 @@ class Scan(Op, ScanMethodsMixin):
        info: ScanInfo,
        mode: Optional[Mode] = None,
        typeConstructor: Optional[TensorConstructorType] = None,
+        truncate_gradient: bool = False,
+        name: Optional[str] = None,
+        gpua: bool = False,
+        as_while: bool = False,
+        profile: Optional[Union[str, bool]] = None,
+        allow_gc: bool = True,
+        strict: bool = True,
    ):
        r"""

@@ -506,33 +506,85 @@ class Scan(Op, ScanMethodsMixin):
        outputs
            Outputs of the inner function of `Scan`.
        info
-            Dictionary containing different properties of the `Scan` `Op` (like
-            number of different types of arguments, name, mode, if it should run on
-            GPU or not, etc.).
+            A collection of information about the sequences and taps.
        mode
-            The compilation mode for the inner graph.
+            The mode used to compile the inner-graph.
+            If you prefer the computations of one step of `scan` to be done
+            differently then the entire function, you can use this parameter to
+            describe how the computations in this loop are done (see
+            `aesara.function` for details about possible values and their meaning).
        typeConstructor
-            Function that constructs an equivalent to Aesara `TensorType`.
+            Function that constructs a `TensorType` for the outputs.
+        truncate_gradient
+            `truncate_gradient` is the number of steps to use in truncated
+            back-propagation through time (BPTT).  If you compute gradients through
+            a `Scan` `Op`, they are computed using BPTT. By providing a different
+            value then ``-1``, you choose to use truncated BPTT instead of classical
+            BPTT, where you go for only `truncate_gradient` number of steps back in
+            time.
+        name
+            When profiling `scan`, it is helpful to provide a name for any
+            instance of `scan`.
+            For example, the profiler will produce an overall profile of your code
+            as well as profiles for the computation of one step of each instance of
+            `Scan`. The `name` of the instance appears in those profiles and can
+            greatly help to disambiguate information.
+        gpua
+            If ``True``, this `Op` should run on a GPU.
+        as_while
+            Whether or not the `Scan` is a ``while``-loop.
+        profile
+            If ``True`` or a non-empty string, a profile object will be created and
+            attached to the inner graph of `Scan`. When `profile` is ``True``, the
+            profiler results will use the name of the `Scan` instance, otherwise it
+            will use the passed string.  The profiler only collects and prints
+            information when running the inner graph with the `CVM` `Linker`.
+        allow_gc
+            Set the value of `allow_gc` for the internal graph of the `Scan`.  If
+            set to ``None``, this will use the value of
+            `aesara.config.scan__allow_gc`.
+
+            The full `Scan` behavior related to allocation is determined by this
+            value and the flag `aesara.config.allow_gc`. If the flag
+            `allow_gc` is ``True`` (default) and this `allow_gc` is ``False``
+            (default), then we let `Scan` allocate all intermediate memory
+            on the first iteration, and they are not garbage collected
+            after that first iteration; this is determined by `allow_gc`. This can
+            speed up allocation of the subsequent iterations. All those temporary
+            allocations are freed at the end of all iterations; this is what the
+            flag `aesara.config.allow_gc` means.
+
+            If you use pre-allocation and this `Scan` is on GPU, the speed up from
+            `allow_gc` is small. If you are missing memory, disabling `allow_gc`
+            could help you run graph that request much memory.
+        strict
+            If ``True``, all the shared variables used in the inner-graph must be provided.

        Notes
        -----
-        `typeConstructor` had been added to refactor how
-        Aesara deals with the GPU. If it runs on the GPU, scan needs
-        to construct certain outputs (those who reside in the GPU
-        memory) as the GPU-specific type.  However we can not import
-        gpu code in this file (as it is in sandbox, and not available
-        on each machine) so the workaround is that the GPU
-        optimization passes to the constructor of this class a
-        function that is able to construct a GPU type. This way the
-        class `Scan` does not need to be aware of the details for the
-        GPU, it just constructs any tensor using this function (which
-        by default constructs normal tensors).
+        `typeConstructor` had been added to refactor how Aesara deals with the
+        GPU. If it runs on the GPU, `Scan` needs to construct certain outputs
+        (those that reside in GPU memory) as the GPU-specific `Type`.  Since we
+        cannot import GPU code here, the GPU optimizations pass the constructor
+        of this class a function that is able to construct a GPU `Type`. This
+        way the class `Scan` does not need to be aware of the GPU details--it
+        simply constructs tensors using this function (which by default
+        constructs normal tensors).
+
+        TODO: Clean up this approach and everything else related to GPUs; it's
+        all currently a very leaky set of abstractions.

        """
-        # adding properties into self
        self.inputs = inputs
        self.outputs = outputs
        self.info = info
+        self.truncate_gradient = truncate_gradient
+        self.name = name
+        self.gpua = gpua
+        self.as_while = as_while
+        self.profile = profile
+        self.allow_gc = allow_gc
+        self.strict = strict
        self.__dict__.update(dataclasses.asdict(info))

        # Clone mode_instance, altering "allow_gc" for the linker,
@@ -591,11 +643,6 @@ class Scan(Op, ScanMethodsMixin):
        if not hasattr(self, "name") or self.name is None:
            self.name = "scan_fn"

-        # to have a fair __eq__ comparison later on, we update the info with
-        # the actual mode used to compile the function and the name of the
-        # function that we set in case none was given
-        self.info = dataclasses.replace(self.info, name=self.name)
-
        # Pre-computing some values to speed up perform
        self.mintaps = [np.min(x) for x in self.tap_array]
        self.mintaps += [0 for x in range(self.n_nit_sot)]
@@ -606,8 +653,9 @@ class Scan(Op, ScanMethodsMixin):
        self.nit_sot_arg_offset = self.shared_arg_offset + self.n_shared_outs
        self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
        self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
-        if self.info.gpua:
-            self._hash_inner_graph = self.info.gpu_hash
+
+        if self.gpua:
+            self._hash_inner_graph = self.gpu_hash
        else:
            # Do the missing inputs check here to have the error early.
            for var in graph_inputs(self.outputs, self.inputs):
@@ -1072,6 +1120,24 @@ class Scan(Op, ScanMethodsMixin):
        if self.info != other.info:
            return False

+        if self.gpua != other.gpua:
+            return False
+
+        if self.as_while != other.as_while:
+            return False
+
+        if self.profile != other.profile:
+            return False
+
+        if self.truncate_gradient != other.truncate_gradient:
+            return False
+
+        if self.name != other.name:
+            return False
+
+        if self.allow_gc != other.allow_gc:
+            return False
+
        # Compare inner graphs
        # TODO: Use `self.inner_fgraph == other.inner_fgraph`
        if len(self.inputs) != len(other.inputs):
@@ -1115,7 +1181,19 @@ class Scan(Op, ScanMethodsMixin):
        return aux_txt

    def __hash__(self):
-        return hash((type(self), self._hash_inner_graph, self.info))
+        return hash(
+            (
+                type(self),
+                self._hash_inner_graph,
+                self.info,
+                self.gpua,
+                self.as_while,
+                self.profile,
+                self.truncate_gradient,
+                self.name,
+                self.allow_gc,
+            )
+        )

    def make_thunk(self, node, storage_map, compute_map, no_recycling, impl=None):
        """
@@ -2661,18 +2739,12 @@ class Scan(Op, ScanMethodsMixin):
            n_seqs=len(outer_inp_seqs),
            n_mit_sot=0,
            tap_array=tuple(tuple(v) for v in new_tap_array),
-            gpua=False,
            n_mit_mot=len(outer_inp_mitmot),
            n_mit_mot_outs=n_mitmot_outs,
            mit_mot_out_slices=tuple(tuple(v) for v in mitmot_out_taps),
-            truncate_gradient=self.truncate_gradient,
            n_sit_sot=n_sitsot_outs,
            n_shared_outs=0,
            n_nit_sot=n_nit_sot,
-            as_while=False,
-            profile=self.profile,
-            name=f"grad_of_{self.name}" if self.name else None,
-            allow_gc=self.allow_gc,
        )

        outer_inputs = (
@@ -2694,7 +2766,18 @@ class Scan(Op, ScanMethodsMixin):
        )
        inner_gfn_outs = inner_out_mitmot + inner_out_sitsot + inner_out_nitsot

-        local_op = Scan(inner_gfn_ins, inner_gfn_outs, info, self.mode)
+        local_op = Scan(
+            inner_gfn_ins,
+            inner_gfn_outs,
+            info,
+            mode=self.mode,
+            truncate_gradient=self.truncate_gradient,
+            gpua=False,
+            as_while=False,
+            profile=self.profile,
+            name=f"grad_of_{self.name}" if self.name else None,
+            allow_gc=self.allow_gc,
+        )
        outputs = local_op(*outer_inputs)
        if type(outputs) not in (list, tuple):
            outputs = [outputs]
@@ -3013,17 +3096,22 @@ class Scan(Op, ScanMethodsMixin):
            n_nit_sot=self.n_nit_sot * 2,
            n_shared_outs=self.n_shared_outs,
            n_mit_mot_outs=n_mit_mot_outs * 2,
+            tap_array=tuple(tuple(v) for v in new_tap_array),
+            mit_mot_out_slices=tuple(tuple(v) for v in self.mit_mot_out_slices) * 2,
+        )
+
+        local_op = Scan(
+            inner_ins,
+            inner_outs,
+            info,
+            mode=self.mode,
            gpua=False,
            as_while=self.as_while,
            profile=self.profile,
            truncate_gradient=self.truncate_gradient,
            name=f"rop_of_{self.name}" if self.name else None,
            allow_gc=self.allow_gc,
-            tap_array=tuple(tuple(v) for v in new_tap_array),
-            mit_mot_out_slices=tuple(tuple(v) for v in self.mit_mot_out_slices) * 2,
        )
-
-        local_op = Scan(inner_ins, inner_outs, info, self.mode)
        outputs = local_op(*scan_inputs)
        if type(outputs) not in (list, tuple):
            outputs = [outputs]

--- a/aesara/scan/opt.py
+++ b/aesara/scan/opt.py
@@ -217,7 +217,19 @@ def remove_constants_and_unused_inputs_scan(fgraph, node):
    if len(nw_inner) != len(op_ins):
        op_outs = clone_replace(op_outs, replace=givens)
        nw_info = dataclasses.replace(op.info, n_seqs=nw_n_seqs)
-        nwScan = Scan(nw_inner, op_outs, nw_info, op.mode)
+        nwScan = Scan(
+            nw_inner,
+            op_outs,
+            nw_info,
+            mode=op.mode,
+            gpua=op.gpua,
+            as_while=op.as_while,
+            profile=op.profile,
+            truncate_gradient=op.truncate_gradient,
+            # TODO: This seems questionable
+            name=op.name,
+            allow_gc=op.allow_gc,
+        )
        nw_outs = nwScan(*nw_outer, return_list=True)
        return dict([("remove", [node])] + list(zip(node.outputs, nw_outs)))
    else:
@@ -396,7 +408,19 @@ class PushOutNonSeqScan(GlobalOptimizer):
            op_ins = clean_inputs + nw_inner

            # Reconstruct node
-            nwScan = Scan(op_ins, op_outs, op.info, op.mode)
+            nwScan = Scan(
+                op_ins,
+                op_outs,
+                op.info,
+                mode=op.mode,
+                gpua=op.gpua,
+                as_while=op.as_while,
+                profile=op.profile,
+                truncate_gradient=op.truncate_gradient,
+                # TODO: This seems questionable
+                name=op.name,
+                allow_gc=op.allow_gc,
+            )

            # Do not call make_node for test_value
            nw_node = nwScan(*(node.inputs + nw_outer), return_list=True)[0].owner
@@ -666,7 +690,19 @@ class PushOutSeqScan(GlobalOptimizer):
            nw_info = dataclasses.replace(
                op.info, n_seqs=op.info.n_seqs + len(nw_inner)
            )
-            nwScan = Scan(op_ins, op_outs, nw_info, op.mode)
+            nwScan = Scan(
+                op_ins,
+                op_outs,
+                nw_info,
+                mode=op.mode,
+                gpua=op.gpua,
+                as_while=op.as_while,
+                profile=op.profile,
+                truncate_gradient=op.truncate_gradient,
+                # TODO: This seems questionable
+                name=op.name,
+                allow_gc=op.allow_gc,
+            )
            # Do not call make_node for test_value
            nw_node = nwScan(
                *(node.inputs[:1] + nw_outer + node.inputs[1:]),
@@ -751,7 +787,9 @@ class PushOutScanOutput(GlobalOptimizer):

        # Use `ScanArgs` to parse the inputs and outputs of scan for ease of
        # use
-        args = ScanArgs(node.inputs, node.outputs, op.inputs, op.outputs, op.info)
+        args = ScanArgs(
+            node.inputs, node.outputs, op.inputs, op.outputs, op.info, op.as_while
+        )

        new_scan_node = None
        clients = {}
@@ -921,6 +959,7 @@ class PushOutScanOutput(GlobalOptimizer):
                new_scan_node.op.inputs,
                new_scan_node.op.outputs,
                new_scan_node.op.info,
+                new_scan_node.op.as_while,
            )

            new_outs = new_scan_args.outer_out_nit_sot[-len(add_as_nitsots) :]
@@ -952,7 +991,14 @@ class PushOutScanOutput(GlobalOptimizer):
            new_scan_args.inner_inputs,
            new_scan_args.inner_outputs,
            new_scan_args.info,
-            old_scan_node.op.mode,
+            mode=old_scan_node.op.mode,
+            gpua=old_scan_node.op.gpua,
+            as_while=old_scan_node.op.as_while,
+            profile=old_scan_node.op.profile,
+            truncate_gradient=old_scan_node.op.truncate_gradient,
+            # TODO: This seems questionable
+            name=old_scan_node.op.name,
+            allow_gc=old_scan_node.op.allow_gc,
        )

        # Create the Apply node for the scan op
@@ -1059,7 +1105,18 @@ class ScanInplaceOptimizer(GlobalOptimizer):
            typeConstructor = self.typeInfer(node)

        new_op = Scan(
-            op.inputs, op.outputs, op.info, op.mode, typeConstructor=typeConstructor
+            op.inputs,
+            op.outputs,
+            op.info,
+            mode=op.mode,
+            typeConstructor=typeConstructor,
+            gpua=op.gpua,
+            as_while=op.as_while,
+            profile=op.profile,
+            truncate_gradient=op.truncate_gradient,
+            # TODO: This seems questionable
+            name=op.name,
+            allow_gc=op.allow_gc,
        )

        destroy_map = op.destroy_map.copy()
@@ -1086,9 +1143,7 @@ class ScanInplaceOptimizer(GlobalOptimizer):
        alloc_ops = (Alloc, AllocEmpty)
        nodes = fgraph.toposort()[::-1]
        scan_nodes = [
-            x
-            for x in nodes
-            if (isinstance(x.op, Scan) and x.op.info.gpua == self.gpua_flag)
+            x for x in nodes if (isinstance(x.op, Scan) and x.op.gpua == self.gpua_flag)
        ]
        for scan_idx in range(len(scan_nodes)):

@@ -1593,7 +1648,20 @@ class ScanSaveMem(GlobalOptimizer):
                return

            # Do not call make_node for test_value
-            new_outs = Scan(inps, outs, info, op.mode)(*node_ins, return_list=True)
+            new_op = Scan(
+                inps,
+                outs,
+                info,
+                mode=op.mode,
+                gpua=op.gpua,
+                as_while=op.as_while,
+                profile=op.profile,
+                truncate_gradient=op.truncate_gradient,
+                # TODO: This seems questionable
+                name=op.name,
+                allow_gc=op.allow_gc,
+            )
+            new_outs = new_op(*node_ins, return_list=True)

            old_new = []
            # 3.7 Get replace pairs for those outputs that do not change
@@ -1871,15 +1939,21 @@ class ScanMerge(GlobalOptimizer):
            n_sit_sot=sum([nd.op.n_sit_sot for nd in nodes]),
            n_shared_outs=sum([nd.op.n_shared_outs for nd in nodes]),
            n_nit_sot=sum([nd.op.n_nit_sot for nd in nodes]),
-            truncate_gradient=nodes[0].op.truncate_gradient,
+        )
+
+        old_op = nodes[0].op
+        new_op = Scan(
+            new_inner_ins,
+            new_inner_outs,
+            info,
+            mode=old_op.mode,
+            profile=old_op.profile,
+            truncate_gradient=old_op.truncate_gradient,
+            allow_gc=old_op.allow_gc,
            name="&".join([nd.op.name for nd in nodes]),
            gpua=False,
            as_while=as_while,
-            profile=nodes[0].op.profile,
-            allow_gc=nodes[0].op.allow_gc,
        )
-
-        new_op = Scan(new_inner_ins, new_inner_outs, info, nodes[0].op.mode)
        new_outs = new_op(*outer_ins)

        if not isinstance(new_outs, (list, tuple)):
@@ -2005,7 +2079,12 @@ def scan_merge_inouts(fgraph, node):
    # Equivalent inputs will be stored in inp_equiv, then a new
    # scan node created without duplicates.
    a = ScanArgs(
-        node.inputs, node.outputs, node.op.inputs, node.op.outputs, node.op.info
+        node.inputs,
+        node.outputs,
+        node.op.inputs,
+        node.op.outputs,
+        node.op.info,
+        node.op.as_while,
    )

    inp_equiv = {}
@@ -2044,13 +2123,32 @@ def scan_merge_inouts(fgraph, node):
        a_inner_outs = a.inner_outputs
        inner_outputs = clone_replace(a_inner_outs, replace=inp_equiv)

-        op = Scan(inner_inputs, inner_outputs, info, node.op.mode)
-        outputs = op(*outer_inputs)
+        new_op = Scan(
+            inner_inputs,
+            inner_outputs,
+            info,
+            mode=node.op.mode,
+            gpua=node.op.gpua,
+            as_while=node.op.as_while,
+            profile=node.op.profile,
+            truncate_gradient=node.op.truncate_gradient,
+            # TODO: This seems questionable
+            name=node.op.name,
+            allow_gc=node.op.allow_gc,
+        )
+        outputs = new_op(*outer_inputs)

        if not isinstance(outputs, (list, tuple)):
            outputs = [outputs]

-        na = ScanArgs(outer_inputs, outputs, op.inputs, op.outputs, op.info)
+        na = ScanArgs(
+            outer_inputs,
+            outputs,
+            new_op.inputs,
+            new_op.outputs,
+            new_op.info,
+            new_op.as_while,
+        )
        remove = [node]
    else:
        na = a
@@ -2302,7 +2400,19 @@ class PushOutDot1(GlobalOptimizer):
                        new_inner_inps, new_inner_outs = reconstruct_graph(
                            _new_inner_inps, _new_inner_outs
                        )
-                        new_op = Scan(new_inner_inps, new_inner_outs, new_info, op.mode)
+                        new_op = Scan(
+                            new_inner_inps,
+                            new_inner_outs,
+                            new_info,
+                            mode=op.mode,
+                            gpua=op.gpua,
+                            as_while=op.as_while,
+                            profile=op.profile,
+                            truncate_gradient=op.truncate_gradient,
+                            # TODO: This seems questionable
+                            name=op.name,
+                            allow_gc=op.allow_gc,
+                        )
                        _scan_inputs = (
                            [node.inputs[0]]
                            + outer_seqs

--- a/aesara/scan/utils.py
+++ b/aesara/scan/utils.py
@@ -701,12 +701,6 @@ def compress_outs(op, not_required, inputs):
        n_sit_sot=0,
        n_shared_outs=0,
        n_nit_sot=0,
-        truncate_gradient=op.info.truncate_gradient,
-        name=op.info.name,
-        gpua=op.info.gpua,
-        as_while=op.info.as_while,
-        profile=op.info.profile,
-        allow_gc=op.info.allow_gc,
    )

    op_inputs = op.inputs[: op.n_seqs]
@@ -886,16 +880,18 @@ class ScanArgs:
        _inner_inputs,
        _inner_outputs,
        info,
+        as_while,
        clone=True,
    ):
        self.n_steps = outer_inputs[0]
+        self.as_while = as_while

        if clone:
            rval = reconstruct_graph(_inner_inputs, _inner_outputs, "")
        else:
            rval = (_inner_inputs, _inner_outputs)

-        if info.as_while:
+        if self.as_while:
            self.cond = [rval[1][-1]]
            inner_outputs = rval[1][:-1]
        else:
@@ -1000,18 +996,6 @@ class ScanArgs:
        assert p == len(outer_outputs)
        assert q == len(inner_outputs)

-        self.other_info = {
-            k: getattr(info, k)
-            for k in (
-                "truncate_gradient",
-                "name",
-                "gpua",
-                "as_while",
-                "profile",
-                "allow_gc",
-            )
-        }
-
    @staticmethod
    def from_node(node, clone=False):
        from aesara.scan.op import Scan
@@ -1024,6 +1008,7 @@ class ScanArgs:
            node.op.inputs,
            node.op.outputs,
            node.op.info,
+            node.op.as_while,
            clone=clone,
        )

@@ -1041,14 +1026,8 @@ class ScanArgs:
            n_shared_outs=0,
            n_mit_mot_outs=0,
            mit_mot_out_slices=(),
-            truncate_gradient=-1,
-            name=None,
-            gpua=False,
-            as_while=False,
-            profile=False,
-            allow_gc=False,
        )
-        res = cls([1], [], [], [], info)
+        res = cls([1], [], [], [], info, False)
        res.n_steps = None
        return res

@@ -1152,7 +1131,6 @@ class ScanArgs:
            n_shared_outs=len(self.outer_in_shared),
            n_mit_mot_outs=sum(len(s) for s in self.mit_mot_out_slices),
            mit_mot_out_slices=tuple(self.mit_mot_out_slices),
-            **self.other_info,
        )

    def get_alt_field(self, var_info, alt_prefix):
@@ -1341,7 +1319,6 @@ class ScanArgs:
                    "mit_mot_out_slices",
                    "mit_mot_in_slices",
                    "mit_sot_in_slices",
-                    "other_info",
                )
            ):
                setattr(res, attr, copy.copy(getattr(self, attr)))