提交 d8e47500 authored 作者: Pierre Luc Carrier's avatar Pierre Luc Carrier 提交者: Pierre Luc Carrier

Correct checks for reuse of preallocated output

上级 b2570e99
...@@ -927,11 +927,14 @@ class Scan(PureOp): ...@@ -927,11 +927,14 @@ class Scan(PureOp):
offset += 1 offset += 1
# 4. collecting slices where the output should be stored # 4. collecting slices where the output should be stored
# 4.1. Collect slices for mitmots
for idx in xrange(self.n_mit_mot_outs): for idx in xrange(self.n_mit_mot_outs):
output_storage[idx].storage[0] = None output_storage[idx].storage[0] = None
# 4.2. Collect slices for mitsots, sitsots and nitsots
offset = self.n_mit_mot_outs offset = self.n_mit_mot_outs
if i != 0 and self.n_nit_sot > 0: if i != 0:
for idx in xrange(self.n_outs + self.n_nit_sot - for idx in xrange(self.n_outs + self.n_nit_sot -
self.n_mit_mot): self.n_mit_mot):
if (store_steps[idx + self.n_mit_mot] == 1 or if (store_steps[idx + self.n_mit_mot] == 1 or
...@@ -946,15 +949,24 @@ class Scan(PureOp): ...@@ -946,15 +949,24 @@ class Scan(PureOp):
self.n_mit_mot): self.n_mit_mot):
output_storage[idx + offset].storage[0] = None output_storage[idx + offset].storage[0] = None
# 4.3. Collect slices for shared outputs
offset += self.n_outs + self.n_nit_sot - self.n_mit_mot offset += self.n_outs + self.n_nit_sot - self.n_mit_mot
for idx in xrange(self.n_shared_outs): for idx in xrange(self.n_shared_outs):
output_storage[idx + offset].storage[0] = None output_storage[idx + offset].storage[0] = None
# If condition add it to the mix
# 4.4. If there is a condition add it to the mix
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables currently in the
# output_storage to be able to compare them with the actual
# outputs of the inner function after its execution
old_output_storage = [o.storage[0] for o in output_storage]
# 5. compute outputs # 5. compute outputs
t0_fn = time.time() t0_fn = time.time()
try: try:
fn() fn()
except Exception: except Exception:
...@@ -974,11 +986,18 @@ class Scan(PureOp): ...@@ -974,11 +986,18 @@ class Scan(PureOp):
else: else:
# old-style linkers raise their own exceptions # old-style linkers raise their own exceptions
raise raise
dt_fn = time.time() - t0_fn dt_fn = time.time() - t0_fn
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
cond = output_storage[pdx].storage[0] == 0 cond = output_storage[pdx].storage[0] == 0
# Check which of the pre-allocated outputs (if applicable) have
# been reused by the inner function
output_reused = [old_output_storage[o] is
output_storage[o].storage[0]
for o in range(len(output_storage))]
t_fn += dt_fn t_fn += dt_fn
offset_out = 0 offset_out = 0
# 5.1 Copy over the values for mit_mot outputs # 5.1 Copy over the values for mit_mot outputs
...@@ -995,8 +1014,7 @@ class Scan(PureOp): ...@@ -995,8 +1014,7 @@ class Scan(PureOp):
for j in xrange(begin, end): for j in xrange(begin, end):
if (store_steps[j] == 1 or self.vector_outs[j] or if (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not not output_reused[offset_out + j]):
output_storage[offset_out + j].storage[0]):
outs[j][0][pos[j]] = \ outs[j][0][pos[j]] = \
output_storage[offset_out + j].storage[0] output_storage[offset_out + j].storage[0]
...@@ -1020,8 +1038,7 @@ class Scan(PureOp): ...@@ -1020,8 +1038,7 @@ class Scan(PureOp):
outs[j][0] = outs[j][0][:store_steps[j]] outs[j][0] = outs[j][0][:store_steps[j]]
outs[j][0][pos[j]] = output_storage[jout].storage[0] outs[j][0][pos[j]] = output_storage[jout].storage[0]
elif (store_steps[j] == 1 or self.vector_outs[j] or elif (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not not output_reused[offset_out + j]):
output_storage[j + offset_out].storage[0]):
outs[j][0][pos[j]] = \ outs[j][0][pos[j]] = \
output_storage[j + offset_out].storage[0] output_storage[j + offset_out].storage[0]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论