提交 7c4871ce authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Remove non-sequence settings from ScanInfo

上级 6f685799
...@@ -1033,6 +1033,13 @@ def scan( ...@@ -1033,6 +1033,13 @@ def scan(
n_sit_sot=n_sit_sot, n_sit_sot=n_sit_sot,
n_shared_outs=n_shared_outs, n_shared_outs=n_shared_outs,
n_nit_sot=n_nit_sot, n_nit_sot=n_nit_sot,
)
local_op = Scan(
inner_inputs,
new_outs,
info,
mode=mode,
truncate_gradient=truncate_gradient, truncate_gradient=truncate_gradient,
name=name, name=name,
gpua=False, gpua=False,
...@@ -1042,8 +1049,6 @@ def scan( ...@@ -1042,8 +1049,6 @@ def scan(
strict=strict, strict=strict,
) )
local_op = Scan(inner_inputs, new_outs, info, mode)
## ##
# Step 8. Compute the outputs using the scan op # Step 8. Compute the outputs using the scan op
## ##
......
...@@ -100,13 +100,6 @@ class ScanInfo: ...@@ -100,13 +100,6 @@ class ScanInfo:
n_sit_sot: int n_sit_sot: int
n_shared_outs: int n_shared_outs: int
n_nit_sot: int n_nit_sot: int
truncate_gradient: bool = False
name: Optional[str] = None
gpua: bool = False
as_while: bool = False
profile: Optional[Union[str, bool]] = None
allow_gc: bool = True
strict: bool = True
TensorConstructorType = Callable[[List[bool], Union[str, np.generic]], TensorType] TensorConstructorType = Callable[[List[bool], Union[str, np.generic]], TensorType]
...@@ -466,7 +459,7 @@ class ScanMethodsMixin: ...@@ -466,7 +459,7 @@ class ScanMethodsMixin:
# output with type GpuArrayType # output with type GpuArrayType
from aesara.gpuarray import GpuArrayType from aesara.gpuarray import GpuArrayType
if not self.info.gpua: if not self.gpua:
for inp in self.inputs: for inp in self.inputs:
if isinstance(inp.type, GpuArrayType): if isinstance(inp.type, GpuArrayType):
raise TypeError( raise TypeError(
...@@ -496,6 +489,13 @@ class Scan(Op, ScanMethodsMixin): ...@@ -496,6 +489,13 @@ class Scan(Op, ScanMethodsMixin):
info: ScanInfo, info: ScanInfo,
mode: Optional[Mode] = None, mode: Optional[Mode] = None,
typeConstructor: Optional[TensorConstructorType] = None, typeConstructor: Optional[TensorConstructorType] = None,
truncate_gradient: bool = False,
name: Optional[str] = None,
gpua: bool = False,
as_while: bool = False,
profile: Optional[Union[str, bool]] = None,
allow_gc: bool = True,
strict: bool = True,
): ):
r""" r"""
...@@ -506,33 +506,85 @@ class Scan(Op, ScanMethodsMixin): ...@@ -506,33 +506,85 @@ class Scan(Op, ScanMethodsMixin):
outputs outputs
Outputs of the inner function of `Scan`. Outputs of the inner function of `Scan`.
info info
Dictionary containing different properties of the `Scan` `Op` (like A collection of information about the sequences and taps.
number of different types of arguments, name, mode, if it should run on
GPU or not, etc.).
mode mode
The compilation mode for the inner graph. The mode used to compile the inner-graph.
If you prefer the computations of one step of `scan` to be done
differently then the entire function, you can use this parameter to
describe how the computations in this loop are done (see
`aesara.function` for details about possible values and their meaning).
typeConstructor typeConstructor
Function that constructs an equivalent to Aesara `TensorType`. Function that constructs a `TensorType` for the outputs.
truncate_gradient
`truncate_gradient` is the number of steps to use in truncated
back-propagation through time (BPTT). If you compute gradients through
a `Scan` `Op`, they are computed using BPTT. By providing a different
value then ``-1``, you choose to use truncated BPTT instead of classical
BPTT, where you go for only `truncate_gradient` number of steps back in
time.
name
When profiling `scan`, it is helpful to provide a name for any
instance of `scan`.
For example, the profiler will produce an overall profile of your code
as well as profiles for the computation of one step of each instance of
`Scan`. The `name` of the instance appears in those profiles and can
greatly help to disambiguate information.
gpua
If ``True``, this `Op` should run on a GPU.
as_while
Whether or not the `Scan` is a ``while``-loop.
profile
If ``True`` or a non-empty string, a profile object will be created and
attached to the inner graph of `Scan`. When `profile` is ``True``, the
profiler results will use the name of the `Scan` instance, otherwise it
will use the passed string. The profiler only collects and prints
information when running the inner graph with the `CVM` `Linker`.
allow_gc
Set the value of `allow_gc` for the internal graph of the `Scan`. If
set to ``None``, this will use the value of
`aesara.config.scan__allow_gc`.
The full `Scan` behavior related to allocation is determined by this
value and the flag `aesara.config.allow_gc`. If the flag
`allow_gc` is ``True`` (default) and this `allow_gc` is ``False``
(default), then we let `Scan` allocate all intermediate memory
on the first iteration, and they are not garbage collected
after that first iteration; this is determined by `allow_gc`. This can
speed up allocation of the subsequent iterations. All those temporary
allocations are freed at the end of all iterations; this is what the
flag `aesara.config.allow_gc` means.
If you use pre-allocation and this `Scan` is on GPU, the speed up from
`allow_gc` is small. If you are missing memory, disabling `allow_gc`
could help you run graph that request much memory.
strict
If ``True``, all the shared variables used in the inner-graph must be provided.
Notes Notes
----- -----
`typeConstructor` had been added to refactor how `typeConstructor` had been added to refactor how Aesara deals with the
Aesara deals with the GPU. If it runs on the GPU, scan needs GPU. If it runs on the GPU, `Scan` needs to construct certain outputs
to construct certain outputs (those who reside in the GPU (those that reside in GPU memory) as the GPU-specific `Type`. Since we
memory) as the GPU-specific type. However we can not import cannot import GPU code here, the GPU optimizations pass the constructor
gpu code in this file (as it is in sandbox, and not available of this class a function that is able to construct a GPU `Type`. This
on each machine) so the workaround is that the GPU way the class `Scan` does not need to be aware of the GPU details--it
optimization passes to the constructor of this class a simply constructs tensors using this function (which by default
function that is able to construct a GPU type. This way the constructs normal tensors).
class `Scan` does not need to be aware of the details for the
GPU, it just constructs any tensor using this function (which TODO: Clean up this approach and everything else related to GPUs; it's
by default constructs normal tensors). all currently a very leaky set of abstractions.
""" """
# adding properties into self
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
self.info = info self.info = info
self.truncate_gradient = truncate_gradient
self.name = name
self.gpua = gpua
self.as_while = as_while
self.profile = profile
self.allow_gc = allow_gc
self.strict = strict
self.__dict__.update(dataclasses.asdict(info)) self.__dict__.update(dataclasses.asdict(info))
# Clone mode_instance, altering "allow_gc" for the linker, # Clone mode_instance, altering "allow_gc" for the linker,
...@@ -591,11 +643,6 @@ class Scan(Op, ScanMethodsMixin): ...@@ -591,11 +643,6 @@ class Scan(Op, ScanMethodsMixin):
if not hasattr(self, "name") or self.name is None: if not hasattr(self, "name") or self.name is None:
self.name = "scan_fn" self.name = "scan_fn"
# to have a fair __eq__ comparison later on, we update the info with
# the actual mode used to compile the function and the name of the
# function that we set in case none was given
self.info = dataclasses.replace(self.info, name=self.name)
# Pre-computing some values to speed up perform # Pre-computing some values to speed up perform
self.mintaps = [np.min(x) for x in self.tap_array] self.mintaps = [np.min(x) for x in self.tap_array]
self.mintaps += [0 for x in range(self.n_nit_sot)] self.mintaps += [0 for x in range(self.n_nit_sot)]
...@@ -606,8 +653,9 @@ class Scan(Op, ScanMethodsMixin): ...@@ -606,8 +653,9 @@ class Scan(Op, ScanMethodsMixin):
self.nit_sot_arg_offset = self.shared_arg_offset + self.n_shared_outs self.nit_sot_arg_offset = self.shared_arg_offset + self.n_shared_outs
self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
self.n_tap_outs = self.n_mit_mot + self.n_mit_sot self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
if self.info.gpua:
self._hash_inner_graph = self.info.gpu_hash if self.gpua:
self._hash_inner_graph = self.gpu_hash
else: else:
# Do the missing inputs check here to have the error early. # Do the missing inputs check here to have the error early.
for var in graph_inputs(self.outputs, self.inputs): for var in graph_inputs(self.outputs, self.inputs):
...@@ -1072,6 +1120,24 @@ class Scan(Op, ScanMethodsMixin): ...@@ -1072,6 +1120,24 @@ class Scan(Op, ScanMethodsMixin):
if self.info != other.info: if self.info != other.info:
return False return False
if self.gpua != other.gpua:
return False
if self.as_while != other.as_while:
return False
if self.profile != other.profile:
return False
if self.truncate_gradient != other.truncate_gradient:
return False
if self.name != other.name:
return False
if self.allow_gc != other.allow_gc:
return False
# Compare inner graphs # Compare inner graphs
# TODO: Use `self.inner_fgraph == other.inner_fgraph` # TODO: Use `self.inner_fgraph == other.inner_fgraph`
if len(self.inputs) != len(other.inputs): if len(self.inputs) != len(other.inputs):
...@@ -1115,7 +1181,19 @@ class Scan(Op, ScanMethodsMixin): ...@@ -1115,7 +1181,19 @@ class Scan(Op, ScanMethodsMixin):
return aux_txt return aux_txt
def __hash__(self): def __hash__(self):
return hash((type(self), self._hash_inner_graph, self.info)) return hash(
(
type(self),
self._hash_inner_graph,
self.info,
self.gpua,
self.as_while,
self.profile,
self.truncate_gradient,
self.name,
self.allow_gc,
)
)
def make_thunk(self, node, storage_map, compute_map, no_recycling, impl=None): def make_thunk(self, node, storage_map, compute_map, no_recycling, impl=None):
""" """
...@@ -2661,18 +2739,12 @@ class Scan(Op, ScanMethodsMixin): ...@@ -2661,18 +2739,12 @@ class Scan(Op, ScanMethodsMixin):
n_seqs=len(outer_inp_seqs), n_seqs=len(outer_inp_seqs),
n_mit_sot=0, n_mit_sot=0,
tap_array=tuple(tuple(v) for v in new_tap_array), tap_array=tuple(tuple(v) for v in new_tap_array),
gpua=False,
n_mit_mot=len(outer_inp_mitmot), n_mit_mot=len(outer_inp_mitmot),
n_mit_mot_outs=n_mitmot_outs, n_mit_mot_outs=n_mitmot_outs,
mit_mot_out_slices=tuple(tuple(v) for v in mitmot_out_taps), mit_mot_out_slices=tuple(tuple(v) for v in mitmot_out_taps),
truncate_gradient=self.truncate_gradient,
n_sit_sot=n_sitsot_outs, n_sit_sot=n_sitsot_outs,
n_shared_outs=0, n_shared_outs=0,
n_nit_sot=n_nit_sot, n_nit_sot=n_nit_sot,
as_while=False,
profile=self.profile,
name=f"grad_of_{self.name}" if self.name else None,
allow_gc=self.allow_gc,
) )
outer_inputs = ( outer_inputs = (
...@@ -2694,7 +2766,18 @@ class Scan(Op, ScanMethodsMixin): ...@@ -2694,7 +2766,18 @@ class Scan(Op, ScanMethodsMixin):
) )
inner_gfn_outs = inner_out_mitmot + inner_out_sitsot + inner_out_nitsot inner_gfn_outs = inner_out_mitmot + inner_out_sitsot + inner_out_nitsot
local_op = Scan(inner_gfn_ins, inner_gfn_outs, info, self.mode) local_op = Scan(
inner_gfn_ins,
inner_gfn_outs,
info,
mode=self.mode,
truncate_gradient=self.truncate_gradient,
gpua=False,
as_while=False,
profile=self.profile,
name=f"grad_of_{self.name}" if self.name else None,
allow_gc=self.allow_gc,
)
outputs = local_op(*outer_inputs) outputs = local_op(*outer_inputs)
if type(outputs) not in (list, tuple): if type(outputs) not in (list, tuple):
outputs = [outputs] outputs = [outputs]
...@@ -3013,17 +3096,22 @@ class Scan(Op, ScanMethodsMixin): ...@@ -3013,17 +3096,22 @@ class Scan(Op, ScanMethodsMixin):
n_nit_sot=self.n_nit_sot * 2, n_nit_sot=self.n_nit_sot * 2,
n_shared_outs=self.n_shared_outs, n_shared_outs=self.n_shared_outs,
n_mit_mot_outs=n_mit_mot_outs * 2, n_mit_mot_outs=n_mit_mot_outs * 2,
tap_array=tuple(tuple(v) for v in new_tap_array),
mit_mot_out_slices=tuple(tuple(v) for v in self.mit_mot_out_slices) * 2,
)
local_op = Scan(
inner_ins,
inner_outs,
info,
mode=self.mode,
gpua=False, gpua=False,
as_while=self.as_while, as_while=self.as_while,
profile=self.profile, profile=self.profile,
truncate_gradient=self.truncate_gradient, truncate_gradient=self.truncate_gradient,
name=f"rop_of_{self.name}" if self.name else None, name=f"rop_of_{self.name}" if self.name else None,
allow_gc=self.allow_gc, allow_gc=self.allow_gc,
tap_array=tuple(tuple(v) for v in new_tap_array),
mit_mot_out_slices=tuple(tuple(v) for v in self.mit_mot_out_slices) * 2,
) )
local_op = Scan(inner_ins, inner_outs, info, self.mode)
outputs = local_op(*scan_inputs) outputs = local_op(*scan_inputs)
if type(outputs) not in (list, tuple): if type(outputs) not in (list, tuple):
outputs = [outputs] outputs = [outputs]
......
...@@ -217,7 +217,19 @@ def remove_constants_and_unused_inputs_scan(fgraph, node): ...@@ -217,7 +217,19 @@ def remove_constants_and_unused_inputs_scan(fgraph, node):
if len(nw_inner) != len(op_ins): if len(nw_inner) != len(op_ins):
op_outs = clone_replace(op_outs, replace=givens) op_outs = clone_replace(op_outs, replace=givens)
nw_info = dataclasses.replace(op.info, n_seqs=nw_n_seqs) nw_info = dataclasses.replace(op.info, n_seqs=nw_n_seqs)
nwScan = Scan(nw_inner, op_outs, nw_info, op.mode) nwScan = Scan(
nw_inner,
op_outs,
nw_info,
mode=op.mode,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
)
nw_outs = nwScan(*nw_outer, return_list=True) nw_outs = nwScan(*nw_outer, return_list=True)
return dict([("remove", [node])] + list(zip(node.outputs, nw_outs))) return dict([("remove", [node])] + list(zip(node.outputs, nw_outs)))
else: else:
...@@ -396,7 +408,19 @@ class PushOutNonSeqScan(GlobalOptimizer): ...@@ -396,7 +408,19 @@ class PushOutNonSeqScan(GlobalOptimizer):
op_ins = clean_inputs + nw_inner op_ins = clean_inputs + nw_inner
# Reconstruct node # Reconstruct node
nwScan = Scan(op_ins, op_outs, op.info, op.mode) nwScan = Scan(
op_ins,
op_outs,
op.info,
mode=op.mode,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
)
# Do not call make_node for test_value # Do not call make_node for test_value
nw_node = nwScan(*(node.inputs + nw_outer), return_list=True)[0].owner nw_node = nwScan(*(node.inputs + nw_outer), return_list=True)[0].owner
...@@ -666,7 +690,19 @@ class PushOutSeqScan(GlobalOptimizer): ...@@ -666,7 +690,19 @@ class PushOutSeqScan(GlobalOptimizer):
nw_info = dataclasses.replace( nw_info = dataclasses.replace(
op.info, n_seqs=op.info.n_seqs + len(nw_inner) op.info, n_seqs=op.info.n_seqs + len(nw_inner)
) )
nwScan = Scan(op_ins, op_outs, nw_info, op.mode) nwScan = Scan(
op_ins,
op_outs,
nw_info,
mode=op.mode,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
)
# Do not call make_node for test_value # Do not call make_node for test_value
nw_node = nwScan( nw_node = nwScan(
*(node.inputs[:1] + nw_outer + node.inputs[1:]), *(node.inputs[:1] + nw_outer + node.inputs[1:]),
...@@ -751,7 +787,9 @@ class PushOutScanOutput(GlobalOptimizer): ...@@ -751,7 +787,9 @@ class PushOutScanOutput(GlobalOptimizer):
# Use `ScanArgs` to parse the inputs and outputs of scan for ease of # Use `ScanArgs` to parse the inputs and outputs of scan for ease of
# use # use
args = ScanArgs(node.inputs, node.outputs, op.inputs, op.outputs, op.info) args = ScanArgs(
node.inputs, node.outputs, op.inputs, op.outputs, op.info, op.as_while
)
new_scan_node = None new_scan_node = None
clients = {} clients = {}
...@@ -921,6 +959,7 @@ class PushOutScanOutput(GlobalOptimizer): ...@@ -921,6 +959,7 @@ class PushOutScanOutput(GlobalOptimizer):
new_scan_node.op.inputs, new_scan_node.op.inputs,
new_scan_node.op.outputs, new_scan_node.op.outputs,
new_scan_node.op.info, new_scan_node.op.info,
new_scan_node.op.as_while,
) )
new_outs = new_scan_args.outer_out_nit_sot[-len(add_as_nitsots) :] new_outs = new_scan_args.outer_out_nit_sot[-len(add_as_nitsots) :]
...@@ -952,7 +991,14 @@ class PushOutScanOutput(GlobalOptimizer): ...@@ -952,7 +991,14 @@ class PushOutScanOutput(GlobalOptimizer):
new_scan_args.inner_inputs, new_scan_args.inner_inputs,
new_scan_args.inner_outputs, new_scan_args.inner_outputs,
new_scan_args.info, new_scan_args.info,
old_scan_node.op.mode, mode=old_scan_node.op.mode,
gpua=old_scan_node.op.gpua,
as_while=old_scan_node.op.as_while,
profile=old_scan_node.op.profile,
truncate_gradient=old_scan_node.op.truncate_gradient,
# TODO: This seems questionable
name=old_scan_node.op.name,
allow_gc=old_scan_node.op.allow_gc,
) )
# Create the Apply node for the scan op # Create the Apply node for the scan op
...@@ -1059,7 +1105,18 @@ class ScanInplaceOptimizer(GlobalOptimizer): ...@@ -1059,7 +1105,18 @@ class ScanInplaceOptimizer(GlobalOptimizer):
typeConstructor = self.typeInfer(node) typeConstructor = self.typeInfer(node)
new_op = Scan( new_op = Scan(
op.inputs, op.outputs, op.info, op.mode, typeConstructor=typeConstructor op.inputs,
op.outputs,
op.info,
mode=op.mode,
typeConstructor=typeConstructor,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
) )
destroy_map = op.destroy_map.copy() destroy_map = op.destroy_map.copy()
...@@ -1086,9 +1143,7 @@ class ScanInplaceOptimizer(GlobalOptimizer): ...@@ -1086,9 +1143,7 @@ class ScanInplaceOptimizer(GlobalOptimizer):
alloc_ops = (Alloc, AllocEmpty) alloc_ops = (Alloc, AllocEmpty)
nodes = fgraph.toposort()[::-1] nodes = fgraph.toposort()[::-1]
scan_nodes = [ scan_nodes = [
x x for x in nodes if (isinstance(x.op, Scan) and x.op.gpua == self.gpua_flag)
for x in nodes
if (isinstance(x.op, Scan) and x.op.info.gpua == self.gpua_flag)
] ]
for scan_idx in range(len(scan_nodes)): for scan_idx in range(len(scan_nodes)):
...@@ -1593,7 +1648,20 @@ class ScanSaveMem(GlobalOptimizer): ...@@ -1593,7 +1648,20 @@ class ScanSaveMem(GlobalOptimizer):
return return
# Do not call make_node for test_value # Do not call make_node for test_value
new_outs = Scan(inps, outs, info, op.mode)(*node_ins, return_list=True) new_op = Scan(
inps,
outs,
info,
mode=op.mode,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
)
new_outs = new_op(*node_ins, return_list=True)
old_new = [] old_new = []
# 3.7 Get replace pairs for those outputs that do not change # 3.7 Get replace pairs for those outputs that do not change
...@@ -1871,15 +1939,21 @@ class ScanMerge(GlobalOptimizer): ...@@ -1871,15 +1939,21 @@ class ScanMerge(GlobalOptimizer):
n_sit_sot=sum([nd.op.n_sit_sot for nd in nodes]), n_sit_sot=sum([nd.op.n_sit_sot for nd in nodes]),
n_shared_outs=sum([nd.op.n_shared_outs for nd in nodes]), n_shared_outs=sum([nd.op.n_shared_outs for nd in nodes]),
n_nit_sot=sum([nd.op.n_nit_sot for nd in nodes]), n_nit_sot=sum([nd.op.n_nit_sot for nd in nodes]),
truncate_gradient=nodes[0].op.truncate_gradient, )
old_op = nodes[0].op
new_op = Scan(
new_inner_ins,
new_inner_outs,
info,
mode=old_op.mode,
profile=old_op.profile,
truncate_gradient=old_op.truncate_gradient,
allow_gc=old_op.allow_gc,
name="&".join([nd.op.name for nd in nodes]), name="&".join([nd.op.name for nd in nodes]),
gpua=False, gpua=False,
as_while=as_while, as_while=as_while,
profile=nodes[0].op.profile,
allow_gc=nodes[0].op.allow_gc,
) )
new_op = Scan(new_inner_ins, new_inner_outs, info, nodes[0].op.mode)
new_outs = new_op(*outer_ins) new_outs = new_op(*outer_ins)
if not isinstance(new_outs, (list, tuple)): if not isinstance(new_outs, (list, tuple)):
...@@ -2005,7 +2079,12 @@ def scan_merge_inouts(fgraph, node): ...@@ -2005,7 +2079,12 @@ def scan_merge_inouts(fgraph, node):
# Equivalent inputs will be stored in inp_equiv, then a new # Equivalent inputs will be stored in inp_equiv, then a new
# scan node created without duplicates. # scan node created without duplicates.
a = ScanArgs( a = ScanArgs(
node.inputs, node.outputs, node.op.inputs, node.op.outputs, node.op.info node.inputs,
node.outputs,
node.op.inputs,
node.op.outputs,
node.op.info,
node.op.as_while,
) )
inp_equiv = {} inp_equiv = {}
...@@ -2044,13 +2123,32 @@ def scan_merge_inouts(fgraph, node): ...@@ -2044,13 +2123,32 @@ def scan_merge_inouts(fgraph, node):
a_inner_outs = a.inner_outputs a_inner_outs = a.inner_outputs
inner_outputs = clone_replace(a_inner_outs, replace=inp_equiv) inner_outputs = clone_replace(a_inner_outs, replace=inp_equiv)
op = Scan(inner_inputs, inner_outputs, info, node.op.mode) new_op = Scan(
outputs = op(*outer_inputs) inner_inputs,
inner_outputs,
info,
mode=node.op.mode,
gpua=node.op.gpua,
as_while=node.op.as_while,
profile=node.op.profile,
truncate_gradient=node.op.truncate_gradient,
# TODO: This seems questionable
name=node.op.name,
allow_gc=node.op.allow_gc,
)
outputs = new_op(*outer_inputs)
if not isinstance(outputs, (list, tuple)): if not isinstance(outputs, (list, tuple)):
outputs = [outputs] outputs = [outputs]
na = ScanArgs(outer_inputs, outputs, op.inputs, op.outputs, op.info) na = ScanArgs(
outer_inputs,
outputs,
new_op.inputs,
new_op.outputs,
new_op.info,
new_op.as_while,
)
remove = [node] remove = [node]
else: else:
na = a na = a
...@@ -2302,7 +2400,19 @@ class PushOutDot1(GlobalOptimizer): ...@@ -2302,7 +2400,19 @@ class PushOutDot1(GlobalOptimizer):
new_inner_inps, new_inner_outs = reconstruct_graph( new_inner_inps, new_inner_outs = reconstruct_graph(
_new_inner_inps, _new_inner_outs _new_inner_inps, _new_inner_outs
) )
new_op = Scan(new_inner_inps, new_inner_outs, new_info, op.mode) new_op = Scan(
new_inner_inps,
new_inner_outs,
new_info,
mode=op.mode,
gpua=op.gpua,
as_while=op.as_while,
profile=op.profile,
truncate_gradient=op.truncate_gradient,
# TODO: This seems questionable
name=op.name,
allow_gc=op.allow_gc,
)
_scan_inputs = ( _scan_inputs = (
[node.inputs[0]] [node.inputs[0]]
+ outer_seqs + outer_seqs
......
...@@ -701,12 +701,6 @@ def compress_outs(op, not_required, inputs): ...@@ -701,12 +701,6 @@ def compress_outs(op, not_required, inputs):
n_sit_sot=0, n_sit_sot=0,
n_shared_outs=0, n_shared_outs=0,
n_nit_sot=0, n_nit_sot=0,
truncate_gradient=op.info.truncate_gradient,
name=op.info.name,
gpua=op.info.gpua,
as_while=op.info.as_while,
profile=op.info.profile,
allow_gc=op.info.allow_gc,
) )
op_inputs = op.inputs[: op.n_seqs] op_inputs = op.inputs[: op.n_seqs]
...@@ -886,16 +880,18 @@ class ScanArgs: ...@@ -886,16 +880,18 @@ class ScanArgs:
_inner_inputs, _inner_inputs,
_inner_outputs, _inner_outputs,
info, info,
as_while,
clone=True, clone=True,
): ):
self.n_steps = outer_inputs[0] self.n_steps = outer_inputs[0]
self.as_while = as_while
if clone: if clone:
rval = reconstruct_graph(_inner_inputs, _inner_outputs, "") rval = reconstruct_graph(_inner_inputs, _inner_outputs, "")
else: else:
rval = (_inner_inputs, _inner_outputs) rval = (_inner_inputs, _inner_outputs)
if info.as_while: if self.as_while:
self.cond = [rval[1][-1]] self.cond = [rval[1][-1]]
inner_outputs = rval[1][:-1] inner_outputs = rval[1][:-1]
else: else:
...@@ -1000,18 +996,6 @@ class ScanArgs: ...@@ -1000,18 +996,6 @@ class ScanArgs:
assert p == len(outer_outputs) assert p == len(outer_outputs)
assert q == len(inner_outputs) assert q == len(inner_outputs)
self.other_info = {
k: getattr(info, k)
for k in (
"truncate_gradient",
"name",
"gpua",
"as_while",
"profile",
"allow_gc",
)
}
@staticmethod @staticmethod
def from_node(node, clone=False): def from_node(node, clone=False):
from aesara.scan.op import Scan from aesara.scan.op import Scan
...@@ -1024,6 +1008,7 @@ class ScanArgs: ...@@ -1024,6 +1008,7 @@ class ScanArgs:
node.op.inputs, node.op.inputs,
node.op.outputs, node.op.outputs,
node.op.info, node.op.info,
node.op.as_while,
clone=clone, clone=clone,
) )
...@@ -1041,14 +1026,8 @@ class ScanArgs: ...@@ -1041,14 +1026,8 @@ class ScanArgs:
n_shared_outs=0, n_shared_outs=0,
n_mit_mot_outs=0, n_mit_mot_outs=0,
mit_mot_out_slices=(), mit_mot_out_slices=(),
truncate_gradient=-1,
name=None,
gpua=False,
as_while=False,
profile=False,
allow_gc=False,
) )
res = cls([1], [], [], [], info) res = cls([1], [], [], [], info, False)
res.n_steps = None res.n_steps = None
return res return res
...@@ -1152,7 +1131,6 @@ class ScanArgs: ...@@ -1152,7 +1131,6 @@ class ScanArgs:
n_shared_outs=len(self.outer_in_shared), n_shared_outs=len(self.outer_in_shared),
n_mit_mot_outs=sum(len(s) for s in self.mit_mot_out_slices), n_mit_mot_outs=sum(len(s) for s in self.mit_mot_out_slices),
mit_mot_out_slices=tuple(self.mit_mot_out_slices), mit_mot_out_slices=tuple(self.mit_mot_out_slices),
**self.other_info,
) )
def get_alt_field(self, var_info, alt_prefix): def get_alt_field(self, var_info, alt_prefix):
...@@ -1341,7 +1319,6 @@ class ScanArgs: ...@@ -1341,7 +1319,6 @@ class ScanArgs:
"mit_mot_out_slices", "mit_mot_out_slices",
"mit_mot_in_slices", "mit_mot_in_slices",
"mit_sot_in_slices", "mit_sot_in_slices",
"other_info",
) )
): ):
setattr(res, attr, copy.copy(getattr(self, attr))) setattr(res, attr, copy.copy(getattr(self, attr)))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论