提交 60e0ed1c authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3660 from carriepl/scan_cleanup

Clean up (outs|inps)_on_gpu to (outs|inps)_is_tensor
...@@ -321,16 +321,14 @@ class Scan(PureOp): ...@@ -321,16 +321,14 @@ class Scan(PureOp):
# not having been preallocated # not having been preallocated
self.mitmots_preallocated = [False] * self.n_mit_mot_outs self.mitmots_preallocated = [False] * self.n_mit_mot_outs
if not hasattr(self, 'outs_on_gpu'): if not hasattr(self, 'outs_is_tensor'):
# The thunk has been compiled before the analysis, at # The thunk has been compiled before the analysis, at
# compilation time, of the location of the inputs and outputs. # compilation time, of the location of the inputs and outputs.
# Perform this analysis here. # Perform this analysis here.
self.inps_on_gpu = [not isinstance(out, self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
theano.tensor.TensorVariable) for out in self.fn.maker.fgraph.inputs]
for out in self.fn.maker.fgraph.inputs] self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
self.outs_on_gpu = [not isinstance(out, for out in self.fn.maker.fgraph.outputs]
theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs]
# Ensure that the graph associated with the inner function is valid. # Ensure that the graph associated with the inner function is valid.
self.validate_inner_graph() self.validate_inner_graph()
...@@ -871,10 +869,10 @@ class Scan(PureOp): ...@@ -871,10 +869,10 @@ class Scan(PureOp):
# Analyse the compile inner function to determine which inputs and # Analyse the compile inner function to determine which inputs and
# outputs are on the gpu and speed up some checks during the execution # outputs are on the gpu and speed up some checks during the execution
self.inps_on_gpu = [not isinstance(out, theano.tensor.TensorVariable) self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.inputs] for out in self.fn.maker.fgraph.inputs]
self.outs_on_gpu = [not isinstance(out, theano.tensor.TensorVariable) self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs] for out in self.fn.maker.fgraph.outputs]
try: try:
cython_mintaps = numpy.asarray(self.mintaps, dtype='int32') cython_mintaps = numpy.asarray(self.mintaps, dtype='int32')
...@@ -912,8 +910,10 @@ class Scan(PureOp): ...@@ -912,8 +910,10 @@ class Scan(PureOp):
cython_mitmots_preallocated = numpy.asarray(self.mitmots_preallocated, cython_mitmots_preallocated = numpy.asarray(self.mitmots_preallocated,
dtype='int32') dtype='int32')
cython_inps_on_gpu = numpy.asarray(self.inps_on_gpu, dtype='int32') cython_inps_is_tensor = numpy.asarray(self.inps_is_tensor,
cython_outs_on_gpu = numpy.asarray(self.outs_on_gpu, dtype='int32') dtype='int32')
cython_outs_is_tensor = numpy.asarray(self.outs_is_tensor,
dtype='int32')
if hasattr(self, 'destroy_map'): if hasattr(self, 'destroy_map'):
cython_destroy_map = [x in self.destroy_map cython_destroy_map = [x in self.destroy_map
...@@ -942,8 +942,8 @@ class Scan(PureOp): ...@@ -942,8 +942,8 @@ class Scan(PureOp):
cython_mit_mot_out_slices, cython_mit_mot_out_slices,
cython_mit_mot_out_nslices, cython_mit_mot_out_nslices,
cython_mitmots_preallocated, cython_mitmots_preallocated,
cython_inps_on_gpu, cython_inps_is_tensor,
cython_outs_on_gpu, cython_outs_is_tensor,
self.fn.fn, self.fn.fn,
self.fn, self.fn,
cython_destroy_map, cython_destroy_map,
...@@ -1305,10 +1305,10 @@ class Scan(PureOp): ...@@ -1305,10 +1305,10 @@ class Scan(PureOp):
if var is None: if var is None:
old_output_data[idx] = None old_output_data[idx] = None
elif self.outs_on_gpu[idx]: elif self.outs_is_tensor[idx]:
old_output_data[idx] = var.gpudata
else:
old_output_data[idx] = var.data old_output_data[idx] = var.data
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# etc) associated with mitmot inputs currently in the # etc) associated with mitmot inputs currently in the
...@@ -1323,10 +1323,10 @@ class Scan(PureOp): ...@@ -1323,10 +1323,10 @@ class Scan(PureOp):
if var is None: if var is None:
old_mitmot_input_data[idx] = None old_mitmot_input_data[idx] = None
elif self.inps_on_gpu[idx]: elif self.inps_is_tensor[idx]:
old_mitmot_input_data[idx] = var.gpudata
else:
old_mitmot_input_data[idx] = var.data old_mitmot_input_data[idx] = var.data
else:
old_mitmot_input_data[idx] = var.gpudata
# 5.1 compute outputs # 5.1 compute outputs
t0_fn = time.time() t0_fn = time.time()
...@@ -1388,10 +1388,10 @@ class Scan(PureOp): ...@@ -1388,10 +1388,10 @@ class Scan(PureOp):
new_var = input_storage[self.n_seqs + inp_idx].storage[0] new_var = input_storage[self.n_seqs + inp_idx].storage[0]
if old_var is new_var: if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx] old_data = old_mitmot_input_data[inp_idx]
if self.inps_on_gpu[self.n_seqs + inp_idx]: if self.inps_is_tensor[self.n_seqs + inp_idx]:
same_data = (new_var.gpudata == old_data)
else:
same_data = (new_var.data == old_data) same_data = (new_var.data == old_data)
else:
same_data = (new_var.gpudata == old_data)
else: else:
same_data = False same_data = False
...@@ -1434,10 +1434,10 @@ class Scan(PureOp): ...@@ -1434,10 +1434,10 @@ class Scan(PureOp):
old_data = old_output_data[offset_out + j] old_data = old_output_data[offset_out + j]
if old_data is None: if old_data is None:
output_reused = False output_reused = False
elif self.outs_on_gpu[offset_out + j]: elif self.outs_is_tensor[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = (new_var.data == old_data) output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else: else:
output_reused = False output_reused = False
...@@ -1477,10 +1477,10 @@ class Scan(PureOp): ...@@ -1477,10 +1477,10 @@ class Scan(PureOp):
if old_var is new_var: if old_var is new_var:
if old_data is None: if old_data is None:
output_reused = False output_reused = False
elif self.outs_on_gpu[offset_out + j]: elif self.outs_is_tensor[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = (new_var.data == old_data) output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else: else:
output_reused = False output_reused = False
......
...@@ -62,7 +62,7 @@ import copy ...@@ -62,7 +62,7 @@ import copy
def get_version(): def get_version():
return 0.291 return 0.292
@cython.boundscheck(False) @cython.boundscheck(False)
def perform( def perform(
...@@ -83,8 +83,8 @@ def perform( ...@@ -83,8 +83,8 @@ def perform(
numpy.ndarray[numpy.int32_t,ndim=2] mit_mot_out_slices, numpy.ndarray[numpy.int32_t,ndim=2] mit_mot_out_slices,
numpy.ndarray[numpy.int32_t,ndim=1] mit_mot_out_nslices, numpy.ndarray[numpy.int32_t,ndim=1] mit_mot_out_nslices,
numpy.ndarray[numpy.int32_t,ndim=1] mitmots_preallocated, numpy.ndarray[numpy.int32_t,ndim=1] mitmots_preallocated,
numpy.ndarray[numpy.int32_t,ndim=1] inps_on_gpu, numpy.ndarray[numpy.int32_t,ndim=1] inps_is_tensor,
numpy.ndarray[numpy.int32_t,ndim=1] outs_on_gpu, numpy.ndarray[numpy.int32_t,ndim=1] outs_is_tensor,
fn, fn,
fnct, fnct,
numpy.ndarray[numpy.int32_t,ndim=1] destroy_map, numpy.ndarray[numpy.int32_t,ndim=1] destroy_map,
...@@ -138,11 +138,11 @@ def perform( ...@@ -138,11 +138,11 @@ def perform(
mit_mot_out_nslices: int32 ndarray (Can be replaced by a list) mit_mot_out_nslices: int32 ndarray (Can be replaced by a list)
Same as tap_array_len, but is the number of output taps of the Same as tap_array_len, but is the number of output taps of the
mit_mot sequences (i.e. it corresponds to mit_mot_out_slices) mit_mot sequences (i.e. it corresponds to mit_mot_out_slices)
inps_on_gpu : int32 ndarray (Can be replaced by a list) inps_is_tensor : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every input, whether it is on the GPU Array of boolean indicating, for every input, whether it is a tensor
or not or not
outs_on_gpu : int32 ndarray (Can be replaced by a list) outs_is_tensor : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every output, whether it is on the GPU Array of boolean indicating, for every output, whether it is a tensor
or not or not
fn: callable fn: callable
This is the linker, i.e. the function that will loop over the This is the linker, i.e. the function that will loop over the
...@@ -368,10 +368,10 @@ def perform( ...@@ -368,10 +368,10 @@ def perform(
if var is None: if var is None:
old_output_data[idx] = None old_output_data[idx] = None
elif outs_on_gpu[idx]: elif outs_is_tensor[idx]:
old_output_data[idx] = var.gpudata
else:
old_output_data[idx] = var.data old_output_data[idx] = var.data
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# etc) associated with mitmot inputs currently in the input_storage to # etc) associated with mitmot inputs currently in the input_storage to
...@@ -385,10 +385,10 @@ def perform( ...@@ -385,10 +385,10 @@ def perform(
if var is None: if var is None:
old_mitmot_input_data[idx] = None old_mitmot_input_data[idx] = None
elif inps_on_gpu[idx]: elif inps_is_tensor[idx]:
old_mitmot_input_data[idx] = var.gpudata
else:
old_mitmot_input_data[idx] = var.data old_mitmot_input_data[idx] = var.data
else:
old_mitmot_input_data[idx] = var.gpudata
# 5.1 compute outputs # 5.1 compute outputs
t0_fn = time.time() t0_fn = time.time()
...@@ -450,10 +450,10 @@ def perform( ...@@ -450,10 +450,10 @@ def perform(
new_var = input_storage[n_seqs + inp_idx].storage[0] new_var = input_storage[n_seqs + inp_idx].storage[0]
if old_var is new_var: if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx] old_data = old_mitmot_input_data[inp_idx]
if inps_on_gpu[n_seqs + inp_idx]: if inps_is_tensor[n_seqs + inp_idx]:
same_data = (new_var.gpudata == old_data)
else:
same_data = (new_var.data == old_data) same_data = (new_var.data == old_data)
else:
same_data = (new_var.gpudata == old_data)
else: else:
same_data = False same_data = False
...@@ -494,10 +494,10 @@ def perform( ...@@ -494,10 +494,10 @@ def perform(
if old_var is new_var: if old_var is new_var:
if old_data is None: if old_data is None:
output_reused = False output_reused = False
elif outs_on_gpu[offset_out + j]: elif outs_is_tensor[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = (new_var.data == old_data) output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else: else:
output_reused = False output_reused = False
...@@ -536,10 +536,10 @@ def perform( ...@@ -536,10 +536,10 @@ def perform(
if old_var is new_var: if old_var is new_var:
if old_data is None: if old_data is None:
output_reused = False output_reused = False
elif outs_on_gpu[offset_out + j]: elif outs_is_tensor[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = (new_var.data == old_data) output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else: else:
output_reused = False output_reused = False
......
...@@ -17,7 +17,7 @@ from theano.gof import cmodule ...@@ -17,7 +17,7 @@ from theano.gof import cmodule
_logger = logging.getLogger('theano.scan_module.scan_perform') _logger = logging.getLogger('theano.scan_module.scan_perform')
version = 0.291 # must match constant returned in function get_version() version = 0.292 # must match constant returned in function get_version()
need_reload = False need_reload = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论