提交 df95d9a9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove tentacles in scan.

上级 a388d94d
...@@ -998,22 +998,20 @@ def scan(fn, ...@@ -998,22 +998,20 @@ def scan(fn,
shared_inner_outputs) shared_inner_outputs)
if condition is not None: if condition is not None:
inner_outs.append(condition) inner_outs.append(condition)
# Cuda and Gpuarray are imported here, instead of being imported on top of # gpuarray is imported here, instead of being imported on top of
# the file because that would force on the user some dependencies that we # the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the # might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley. # dependencies on sandbox code completeley.
from theano.sandbox import cuda
from theano import gpuarray from theano import gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated: if gpuarray.pygpu_activated:
# very often we end up in this situation when we want to # very often we end up in this situation when we want to
# replace w with w_copy, where w is a GPU variable # replace w with w_copy, where w is a GPU variable
# and w_copy is TensorType. This is caused because shared # and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| , # variables are put on GPU right away >:| ,
new_givens = OrderedDict() new_givens = OrderedDict()
for w, w_copy in iteritems(givens): for w, w_copy in iteritems(givens):
if ((isinstance(w.type, cuda.CudaNdarrayType) or if (isinstance(w.type, gpuarray.GpuArrayType) and
isinstance(w.type, gpuarray.GpuArrayType)) and
isinstance(w_copy.type, tensor.TensorType)): isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs: for o in inner_outs:
new_givens = traverse(o, w, w_copy, new_givens) new_givens = traverse(o, w, w_copy, new_givens)
...@@ -1046,7 +1044,7 @@ def scan(fn, ...@@ -1046,7 +1044,7 @@ def scan(fn,
info['name'] = name info['name'] = name
info['mode'] = mode info['mode'] = mode
info['destroy_map'] = OrderedDict() info['destroy_map'] = OrderedDict()
info['gpu'] = False info['gpua'] = False
info['as_while'] = as_while info['as_while'] = as_while
info['profile'] = profile info['profile'] = profile
info['allow_gc'] = allow_gc info['allow_gc'] = allow_gc
...@@ -1072,7 +1070,7 @@ def scan(fn, ...@@ -1072,7 +1070,7 @@ def scan(fn,
arg = tensor.as_tensor_variable(arg) arg = tensor.as_tensor_variable(arg)
except TypeError: except TypeError:
# This happens for Random States for e.g. but it is a good way # This happens for Random States for e.g. but it is a good way
# to make sure no input is a cuda ndarrays # to make sure all inputs are tensors.
pass pass
scan_inputs += [arg] scan_inputs += [arg]
scan_outs = local_op(*scan_inputs) scan_outs = local_op(*scan_inputs)
......
...@@ -125,8 +125,6 @@ class Scan(PureOp): ...@@ -125,8 +125,6 @@ class Scan(PureOp):
info, info,
typeConstructor=None, typeConstructor=None,
): ):
if 'gpua' not in info:
info['gpua'] = False
# adding properties into self # adding properties into self
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
...@@ -204,7 +202,7 @@ class Scan(PureOp): ...@@ -204,7 +202,7 @@ class Scan(PureOp):
self.n_shared_outs) self.n_shared_outs)
self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
self.n_tap_outs = self.n_mit_mot + self.n_mit_sot self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
if self.info['gpu'] or self.info['gpua']: if self.info['gpua']:
self._hash_inner_graph = self.info['gpu_hash'] self._hash_inner_graph = self.info['gpu_hash']
else: else:
# Do the missing inputs check here to have the error early. # Do the missing inputs check here to have the error early.
...@@ -250,27 +248,6 @@ class Scan(PureOp): ...@@ -250,27 +248,6 @@ class Scan(PureOp):
"type '%s' and '%s' respectively." % "type '%s' and '%s' respectively." %
(self.name, type_input, type_output)) (self.name, type_input, type_output))
# If scan has the flag 'gpu' set to false (meaning that is shouldn't
# use the CUDA gpu backend ), ensure that is has no input and no
# output with type CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType
if not self.info.get("gpu", False):
for inp in self.inputs:
if isinstance(inp.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the inputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
for out in self.outputs:
if isinstance(out.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the outputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
# If scan has the flag 'gpua' set to false (meaning that is shouldn't # If scan has the flag 'gpua' set to false (meaning that is shouldn't
# use the gpuarray gpu backend ), ensure that is has no input and no # use the gpuarray gpu backend ), ensure that is has no input and no
# output with type GpuArrayType # output with type GpuArrayType
...@@ -297,9 +274,6 @@ class Scan(PureOp): ...@@ -297,9 +274,6 @@ class Scan(PureOp):
if "allow_gc" not in self.__dict__: if "allow_gc" not in self.__dict__:
self.allow_gc = True self.allow_gc = True
self.info['allow_gc'] = True self.info['allow_gc'] = True
if not hasattr(self, 'gpua'):
self.gpua = False
self.info['gpua'] = False
if not hasattr(self, 'var_mappings'): if not hasattr(self, 'var_mappings'):
# Generate the mappings between inner and outer inputs and outputs # Generate the mappings between inner and outer inputs and outputs
# if they haven't already been generated. # if they haven't already been generated.
...@@ -436,9 +410,9 @@ class Scan(PureOp): ...@@ -436,9 +410,9 @@ class Scan(PureOp):
def format(var, as_var): def format(var, as_var):
""" """
This functions ensures that ``out`` has the same dtype as This functions ensures that ``out`` has the same dtype as
``inp`` as well as calling filter_variable to make sure they are ``inp`` as well as calling filter_variable to make sure
both TensorType or CudaNdarrayType. It internally deals with the they are both TensorType or GpuArrayType. It internally
corner case where inp.ndim + 1 = out.ndim deals with the corner case where inp.ndim + 1 = out.ndim
""" """
if not hasattr(var, 'dtype'): if not hasattr(var, 'dtype'):
...@@ -672,7 +646,7 @@ class Scan(PureOp): ...@@ -672,7 +646,7 @@ class Scan(PureOp):
'n_seqs', 'tap_array', 'n_seqs', 'tap_array',
'as_while', 'n_mit_sot', 'destroy_map', 'as_while', 'n_mit_sot', 'destroy_map',
'n_nit_sot', 'n_shared_outs', 'n_nit_sot', 'n_shared_outs',
'n_sit_sot', 'gpu', 'gpua', 'n_mit_mot_outs', 'n_sit_sot', 'gpua', 'n_mit_mot_outs',
'n_mit_mot', 'mit_mot_out_slices'] 'n_mit_mot', 'mit_mot_out_slices']
# This are some safety checks ( namely that the inner graph has the # This are some safety checks ( namely that the inner graph has the
# same number of inputs and same number of outputs ) # same number of inputs and same number of outputs )
...@@ -696,7 +670,7 @@ class Scan(PureOp): ...@@ -696,7 +670,7 @@ class Scan(PureOp):
other.inputs) other.inputs)
def __str__(self): def __str__(self):
if self.gpu: if self.gpua:
gpu_str = 'gpu' gpu_str = 'gpu'
else: else:
gpu_str = 'cpu' gpu_str = 'cpu'
...@@ -1318,7 +1292,7 @@ class Scan(PureOp): ...@@ -1318,7 +1292,7 @@ class Scan(PureOp):
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them # etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its # with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect # execution. Also keep pointers to their data to be able to detect
...@@ -1336,7 +1310,7 @@ class Scan(PureOp): ...@@ -1336,7 +1310,7 @@ class Scan(PureOp):
else: else:
old_output_data[idx] = var.gpudata old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the # etc) associated with mitmot inputs currently in the
# input_storage to be able to compare them with the content of the # input_storage to be able to compare them with the content of the
# input_storage after the execution of the function. Also keep # input_storage after the execution of the function. Also keep
...@@ -2514,7 +2488,7 @@ class Scan(PureOp): ...@@ -2514,7 +2488,7 @@ class Scan(PureOp):
info['n_seqs'] = len(outer_inp_seqs) info['n_seqs'] = len(outer_inp_seqs)
info['n_mit_sot'] = 0 info['n_mit_sot'] = 0
info['tap_array'] = new_tap_array info['tap_array'] = new_tap_array
info['gpu'] = False info['gpua'] = False
info['n_mit_mot'] = len(outer_inp_mitmot) info['n_mit_mot'] = len(outer_inp_mitmot)
info['n_mit_mot_outs'] = n_mitmot_outs info['n_mit_mot_outs'] = n_mitmot_outs
info['mit_mot_out_slices'] = mitmot_out_taps info['mit_mot_out_slices'] = mitmot_out_taps
...@@ -2683,7 +2657,7 @@ class Scan(PureOp): ...@@ -2683,7 +2657,7 @@ class Scan(PureOp):
info['n_mit_mot'] = self.n_mit_mot * 2 info['n_mit_mot'] = self.n_mit_mot * 2
info['n_nit_sot'] = self.n_nit_sot * 2 info['n_nit_sot'] = self.n_nit_sot * 2
info['n_shared_outs'] = self.n_shared_outs info['n_shared_outs'] = self.n_shared_outs
info['gpu'] = False info['gpua'] = False
info['as_while'] = self.as_while info['as_while'] = self.as_while
info['profile'] = self.profile info['profile'] = self.profile
info['truncate_gradient'] = self.truncate_gradient info['truncate_gradient'] = self.truncate_gradient
......
...@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer):
""" """
def __init__(self, typeInfer=None, gpu_flag=False, gpua_flag=False): def __init__(self, typeInfer=None, gpua_flag=False):
Optimizer.__init__(self) Optimizer.__init__(self)
self.typeInfer = typeInfer self.typeInfer = typeInfer
self.gpu_flag = gpu_flag
self.gpua_flag = gpua_flag self.gpua_flag = gpua_flag
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
...@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer):
def apply(self, fgraph): def apply(self, fgraph):
# Depending on the values of gpu_flag and gpua_flag, get the list of # Depending on the value of gpua_flag, get the list of memory
# memory allocation ops that the optimization should be able to handle # allocation ops that the optimization should be able to
# handle
alloc_ops = (Alloc, AllocEmpty) alloc_ops = (Alloc, AllocEmpty)
if self.gpu_flag:
alloc_ops += (theano.sandbox.cuda.GpuAlloc,
theano.sandbox.cuda.GpuAllocEmpty)
if self.gpua_flag: if self.gpua_flag:
# gpuarray might be imported but not its GpuAlloc and # gpuarray might be imported but not its GpuAlloc and
# GpuAllopEmpty ops. # GpuAllopEmpty ops.
...@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer):
nodes = fgraph.toposort()[::-1] nodes = fgraph.toposort()[::-1]
scan_nodes = [x for x in nodes scan_nodes = [x for x in nodes
if (isinstance(x.op, scan_op.Scan) and if (isinstance(x.op, scan_op.Scan) and
x.op.info['gpu'] == self.gpu_flag and
x.op.info['gpua'] == self.gpua_flag)] x.op.info['gpua'] == self.gpua_flag)]
for scan_idx in xrange(len(scan_nodes)): for scan_idx in xrange(len(scan_nodes)):
...@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan') ...@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
# ScanSaveMem should execute only once per node. # ScanSaveMem should execute only once per node.
optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan') optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan')
optdb.register('scanOp_make_inplace', optdb.register('scanOp_make_inplace',
ScanInplaceOptimizer(typeInfer=None, ScanInplaceOptimizer(typeInfer=None),
gpu_flag=False),
75, 75,
'fast_run', 'fast_run',
'inplace', 'inplace',
......
...@@ -355,7 +355,7 @@ def perform( ...@@ -355,7 +355,7 @@ def perform(
pdx = offset + n_shared_outs pdx = offset + n_shared_outs
output_storage[<unsigned int>pdx].storage[0] = None output_storage[<unsigned int>pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them # etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its # with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect # execution. Also keep pointers to their data to be able to detect
...@@ -373,7 +373,7 @@ def perform( ...@@ -373,7 +373,7 @@ def perform(
else: else:
old_output_data[idx] = var.gpudata old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the input_storage to # etc) associated with mitmot inputs currently in the input_storage to
# be able to compare them with the content of the input_storage after # be able to compare them with the content of the input_storage after
# the execution of the function. Also keep pointers to their data to # the execution of the function. Also keep pointers to their data to
......
...@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None): ...@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None):
try: try:
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
except TypeError: except TypeError:
# This could happen for example for random states, and I really # This could happen for example for random states
# want to avoid the convoluted logic that checks for cuda
# ndarrays
pass pass
# Cast x if needed. If x has a test value, this will also cast it. # Cast x if needed. If x has a test value, this will also cast it.
...@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited: if out in visited:
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda
from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
from theano.gpuarray import pygpu_activated from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType from theano.gpuarray.type import GpuArrayType
if out == x: if out == x:
if isinstance(x.type, cuda.CudaNdarrayType): assert isinstance(x.type, GpuArrayType)
d[out] = cuda.gpu_from_host(x_copy) d[out] = GpuFromHost(x.type.context_name)(x_copy)
else:
assert isinstance(x.type, GpuArrayType)
d[out] = GpuFromHost(x.type.context_name)(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
return d return d
elif (cuda.cuda_available and
out.owner.op == cuda.host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
elif (pygpu_activated and elif (pygpu_activated and
out.owner.op == host_from_gpu and out.owner.op == host_from_gpu and
out.owner.inputs == [x]): out.owner.inputs == [x]):
...@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs): ...@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs):
info['n_nit_sot'] = 0 info['n_nit_sot'] = 0
info['truncate_gradient'] = op.info['truncate_gradient'] info['truncate_gradient'] = op.info['truncate_gradient']
info['name'] = op.info['name'] info['name'] = op.info['name']
info['gpu'] = op.info['gpu']
info['gpua'] = op.info['gpua'] info['gpua'] = op.info['gpua']
info['mode'] = op.info['mode'] info['mode'] = op.info['mode']
info['as_while'] = op.info['as_while'] info['as_while'] = op.info['as_while']
...@@ -1257,7 +1245,7 @@ class scan_args(object): ...@@ -1257,7 +1245,7 @@ class scan_args(object):
self.other_info = OrderedDict() self.other_info = OrderedDict()
for k in ('truncate_gradient', 'name', 'mode', 'destroy_map', for k in ('truncate_gradient', 'name', 'mode', 'destroy_map',
'gpu', 'gpua', 'as_while', 'profile', 'allow_gc'): 'gpua', 'as_while', 'profile', 'allow_gc'):
if k in info: if k in info:
self.other_info[k] = info[k] self.other_info[k] = info[k]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -4872,68 +4872,6 @@ class ScanGpuTests: ...@@ -4872,68 +4872,6 @@ class ScanGpuTests:
utt.assert_allclose(output, expected_output) utt.assert_allclose(output, expected_output)
class T_Scan_Cuda(unittest.TestCase, ScanGpuTests):
"""This class takes the gpu tests for scan that are defined in
class ScanGpuTests and runs them using the cuda backend. It also adds
tests specific to the cuda backend
"""
def __init__(self, *args, **kwargs):
from theano.sandbox import cuda
self.gpu_backend = cuda
self.mode_with_gpu = mode_with_gpu
self.mode_with_gpu_nodebug = mode_with_gpu_nodebug
super(T_Scan_Cuda, self).__init__(*args, **kwargs)
def setUp(self):
# Skip the test if cuda is not available
if not self.gpu_backend.cuda_available:
raise SkipTest('Optional package cuda disabled')
utt.seed_rng()
super(T_Scan_Cuda, self).setUp()
def is_scan_on_gpu(self, node):
return node.op.info.get('gpu', False)
def test_inconsistent_inner_fct(self):
# Test that scan can detect inconsistencies in the inner graph and
# raises an appropriate exception. The pickled file used in this test
# relies on the cuda backend.
# This test has not been extensively tested for Python 3 so it should
# be skipped if python version is >=3
version = sys.version_info
if version >= (3,):
raise SkipTest("This test relies on a pickled file produced with "
"Python 2. The current python version "
"(%i.%i.%i.%i) is >= 3 so the test will be "
"skipped." % (version.major, version.minor,
version.micro, version.serial))
# When unpickled, the scan op should perform validation on its inner
# graph, detect the inconsistencies and raise a TypeError
folder = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(folder, "inconsistent_scan.pkl")
assert_raises(TypeError, pickle.load, open(path, "r"))
def test_consistent_inner_fct(self):
# Test that scan does not falsely detect inconsistencies in a valid
# inner graph
rs = theano.sandbox.rng_mrg.MRG_RandomStreams(use_cuda=True)
output, _ = theano.scan(lambda : rs.uniform((3,), dtype="float32"),
n_steps=3)
pickle.loads(pickle.dumps(output))
# Also ensure that, after compilation, the Scan has been moved
# on the gpu
fct = theano.function([], output, mode=self.mode_with_gpu)
scan_nodes = scan_nodes_from_fct(fct)
assert len(scan_nodes) == 1
assert self.is_scan_on_gpu(scan_nodes[0])
class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests): class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests):
"""This class takes the gpu tests for scan that are defined in """This class takes the gpu tests for scan that are defined in
class ScanGpuTests and runs them using the gpuarray backend. class ScanGpuTests and runs them using the gpuarray backend.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论