提交 d2b623a9 authored 作者: --global's avatar --global

Apply memory reuse in Scan's Cython backend

上级 11964f0a
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -62,7 +62,7 @@ import copy ...@@ -62,7 +62,7 @@ import copy
def get_version(): def get_version():
return 0.285 return 0.286
@cython.boundscheck(False) @cython.boundscheck(False)
def perform( def perform(
...@@ -255,6 +255,8 @@ def perform( ...@@ -255,6 +255,8 @@ def perform(
other_args = args[offset:] other_args = args[offset:]
input_storage = fnct.input_storage input_storage = fnct.input_storage
output_storage = fnct.output_storage output_storage = fnct.output_storage
old_output_storage = [None] * len_output_storage
old_output_data = [None] * len_output_storage
offset = n_seqs offset = n_seqs
for idx in range(n_outs): for idx in range(n_outs):
offset += tap_array_len[idx] offset += tap_array_len[idx]
...@@ -339,9 +341,19 @@ def perform( ...@@ -339,9 +341,19 @@ def perform(
output_storage[<unsigned int>pdx].storage[0] = None output_storage[<unsigned int>pdx].storage[0] = None
# 4.5. Keep a reference to the variables currently in the # 4.5. Keep a reference to the variables currently in the
# output_storage to be able to compare them with the actual # output_storage, and their data, to be able to compare them with
# outputs of the inner function after its execution # the actual outputs of the inner function after its execution
old_output_storage = [o.storage[0] for o in output_storage] for idx in range(len_output_storage):
var = output_storage[idx].storage[0]
old_output_storage[idx] = var
if hasattr(var, 'gpudata'):
old_output_data[idx] = var.gpudata
elif hasattr(var, 'data'):
old_output_data[idx] = var.data
else:
old_output_data[idx] = None
# 5. compute outputs # 5. compute outputs
t0_fn = time.time() t0_fn = time.time()
...@@ -366,9 +378,26 @@ def perform( ...@@ -366,9 +378,26 @@ def perform(
# Check which of the pre-allocated outputs (if applicable) have # Check which of the pre-allocated outputs (if applicable) have
# been reused by the inner function # been reused by the inner function
for j in range(len_output_storage): for idx in range(len_output_storage):
output_reused[j] = (old_output_storage[j] is # If the storage map does not contain the same object, then
output_storage[j].storage[0]) # the pre-allocated output has not been reused
new_var = output_storage[idx].storage[0]
if old_output_storage[idx] is new_var:
# The pre-allocated output is only considered as having
# been reused if it still points to the same data as it
# did before the execution of the inner function
if old_output_data[idx] is None:
output_reused[idx] = False
else:
if hasattr(new_var, 'gpudata'):
output_reused[idx] = (new_var.gpudata ==
old_output_data[idx])
elif hasattr(new_var, 'data'):
output_reused[idx] = (new_var.data ==
old_output_data[idx])
else:
output_reused[idx] = False
offset_out = 0 offset_out = 0
# 5.1 Copy over the values for mit_mot outputs # 5.1 Copy over the values for mit_mot outputs
......
...@@ -16,7 +16,7 @@ from theano.gof import cmodule ...@@ -16,7 +16,7 @@ from theano.gof import cmodule
_logger = logging.getLogger('theano.scan_module.scan_perform') _logger = logging.getLogger('theano.scan_module.scan_perform')
version = 0.285 # must match constant returned in function get_version() version = 0.286 # must match constant returned in function get_version()
need_reload = False need_reload = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论