提交 60e0ed1c authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3660 from carriepl/scan_cleanup

Clean up (outs|inps)_on_gpu to (outs|inps)_is_tensor
......@@ -321,16 +321,14 @@ class Scan(PureOp):
# not having been preallocated
self.mitmots_preallocated = [False] * self.n_mit_mot_outs
if not hasattr(self, 'outs_on_gpu'):
if not hasattr(self, 'outs_is_tensor'):
# The thunk has been compiled before the analysis, at
# compilation time, of the location of the inputs and outputs.
# Perform this analysis here.
self.inps_on_gpu = [not isinstance(out,
theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.inputs]
self.outs_on_gpu = [not isinstance(out,
theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs]
self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.inputs]
self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs]
# Ensure that the graph associated with the inner function is valid.
self.validate_inner_graph()
......@@ -871,10 +869,10 @@ class Scan(PureOp):
# Analyse the compile inner function to determine which inputs and
# outputs are on the gpu and speed up some checks during the execution
self.inps_on_gpu = [not isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.inputs]
self.outs_on_gpu = [not isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs]
self.inps_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.inputs]
self.outs_is_tensor = [isinstance(out, theano.tensor.TensorVariable)
for out in self.fn.maker.fgraph.outputs]
try:
cython_mintaps = numpy.asarray(self.mintaps, dtype='int32')
......@@ -912,8 +910,10 @@ class Scan(PureOp):
cython_mitmots_preallocated = numpy.asarray(self.mitmots_preallocated,
dtype='int32')
cython_inps_on_gpu = numpy.asarray(self.inps_on_gpu, dtype='int32')
cython_outs_on_gpu = numpy.asarray(self.outs_on_gpu, dtype='int32')
cython_inps_is_tensor = numpy.asarray(self.inps_is_tensor,
dtype='int32')
cython_outs_is_tensor = numpy.asarray(self.outs_is_tensor,
dtype='int32')
if hasattr(self, 'destroy_map'):
cython_destroy_map = [x in self.destroy_map
......@@ -942,8 +942,8 @@ class Scan(PureOp):
cython_mit_mot_out_slices,
cython_mit_mot_out_nslices,
cython_mitmots_preallocated,
cython_inps_on_gpu,
cython_outs_on_gpu,
cython_inps_is_tensor,
cython_outs_is_tensor,
self.fn.fn,
self.fn,
cython_destroy_map,
......@@ -1305,10 +1305,10 @@ class Scan(PureOp):
if var is None:
old_output_data[idx] = None
elif self.outs_on_gpu[idx]:
old_output_data[idx] = var.gpudata
else:
elif self.outs_is_tensor[idx]:
old_output_data[idx] = var.data
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# etc) associated with mitmot inputs currently in the
......@@ -1323,10 +1323,10 @@ class Scan(PureOp):
if var is None:
old_mitmot_input_data[idx] = None
elif self.inps_on_gpu[idx]:
old_mitmot_input_data[idx] = var.gpudata
else:
elif self.inps_is_tensor[idx]:
old_mitmot_input_data[idx] = var.data
else:
old_mitmot_input_data[idx] = var.gpudata
# 5.1 compute outputs
t0_fn = time.time()
......@@ -1388,10 +1388,10 @@ class Scan(PureOp):
new_var = input_storage[self.n_seqs + inp_idx].storage[0]
if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx]
if self.inps_on_gpu[self.n_seqs + inp_idx]:
same_data = (new_var.gpudata == old_data)
else:
if self.inps_is_tensor[self.n_seqs + inp_idx]:
same_data = (new_var.data == old_data)
else:
same_data = (new_var.gpudata == old_data)
else:
same_data = False
......@@ -1434,10 +1434,10 @@ class Scan(PureOp):
old_data = old_output_data[offset_out + j]
if old_data is None:
output_reused = False
elif self.outs_on_gpu[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
elif self.outs_is_tensor[offset_out + j]:
output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = False
......@@ -1477,10 +1477,10 @@ class Scan(PureOp):
if old_var is new_var:
if old_data is None:
output_reused = False
elif self.outs_on_gpu[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
elif self.outs_is_tensor[offset_out + j]:
output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = False
......
......@@ -62,7 +62,7 @@ import copy
def get_version():
return 0.291
return 0.292
@cython.boundscheck(False)
def perform(
......@@ -83,8 +83,8 @@ def perform(
numpy.ndarray[numpy.int32_t,ndim=2] mit_mot_out_slices,
numpy.ndarray[numpy.int32_t,ndim=1] mit_mot_out_nslices,
numpy.ndarray[numpy.int32_t,ndim=1] mitmots_preallocated,
numpy.ndarray[numpy.int32_t,ndim=1] inps_on_gpu,
numpy.ndarray[numpy.int32_t,ndim=1] outs_on_gpu,
numpy.ndarray[numpy.int32_t,ndim=1] inps_is_tensor,
numpy.ndarray[numpy.int32_t,ndim=1] outs_is_tensor,
fn,
fnct,
numpy.ndarray[numpy.int32_t,ndim=1] destroy_map,
......@@ -138,11 +138,11 @@ def perform(
mit_mot_out_nslices: int32 ndarray (Can be replaced by a list)
Same as tap_array_len, but is the number of output taps of the
mit_mot sequences (i.e. it corresponds to mit_mot_out_slices)
inps_on_gpu : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every input, whether it is on the GPU
inps_is_tensor : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every input, whether it is a tensor
or not
outs_on_gpu : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every output, whether it is on the GPU
outs_is_tensor : int32 ndarray (Can be replaced by a list)
Array of boolean indicating, for every output, whether it is a tensor
or not
fn: callable
This is the linker, i.e. the function that will loop over the
......@@ -368,10 +368,10 @@ def perform(
if var is None:
old_output_data[idx] = None
elif outs_on_gpu[idx]:
old_output_data[idx] = var.gpudata
else:
elif outs_is_tensor[idx]:
old_output_data[idx] = var.data
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# etc) associated with mitmot inputs currently in the input_storage to
......@@ -385,10 +385,10 @@ def perform(
if var is None:
old_mitmot_input_data[idx] = None
elif inps_on_gpu[idx]:
old_mitmot_input_data[idx] = var.gpudata
else:
elif inps_is_tensor[idx]:
old_mitmot_input_data[idx] = var.data
else:
old_mitmot_input_data[idx] = var.gpudata
# 5.1 compute outputs
t0_fn = time.time()
......@@ -450,10 +450,10 @@ def perform(
new_var = input_storage[n_seqs + inp_idx].storage[0]
if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx]
if inps_on_gpu[n_seqs + inp_idx]:
same_data = (new_var.gpudata == old_data)
else:
if inps_is_tensor[n_seqs + inp_idx]:
same_data = (new_var.data == old_data)
else:
same_data = (new_var.gpudata == old_data)
else:
same_data = False
......@@ -494,10 +494,10 @@ def perform(
if old_var is new_var:
if old_data is None:
output_reused = False
elif outs_on_gpu[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
elif outs_is_tensor[offset_out + j]:
output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = False
......@@ -536,10 +536,10 @@ def perform(
if old_var is new_var:
if old_data is None:
output_reused = False
elif outs_on_gpu[offset_out + j]:
output_reused = (new_var.gpudata == old_data)
else:
elif outs_is_tensor[offset_out + j]:
output_reused = (new_var.data == old_data)
else:
output_reused = (new_var.gpudata == old_data)
else:
output_reused = False
......
......@@ -17,7 +17,7 @@ from theano.gof import cmodule
_logger = logging.getLogger('theano.scan_module.scan_perform')
version = 0.291 # must match constant returned in function get_version()
version = 0.292 # must match constant returned in function get_version()
need_reload = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论