提交 df95d9a9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove tentacles in scan.

上级 a388d94d
...@@ -998,22 +998,20 @@ def scan(fn, ...@@ -998,22 +998,20 @@ def scan(fn,
shared_inner_outputs) shared_inner_outputs)
if condition is not None: if condition is not None:
inner_outs.append(condition) inner_outs.append(condition)
# Cuda and Gpuarray are imported here, instead of being imported on top of # gpuarray is imported here, instead of being imported on top of
# the file because that would force on the user some dependencies that we # the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the # might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley. # dependencies on sandbox code completeley.
from theano.sandbox import cuda
from theano import gpuarray from theano import gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated: if gpuarray.pygpu_activated:
# very often we end up in this situation when we want to # very often we end up in this situation when we want to
# replace w with w_copy, where w is a GPU variable # replace w with w_copy, where w is a GPU variable
# and w_copy is TensorType. This is caused because shared # and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| , # variables are put on GPU right away >:| ,
new_givens = OrderedDict() new_givens = OrderedDict()
for w, w_copy in iteritems(givens): for w, w_copy in iteritems(givens):
if ((isinstance(w.type, cuda.CudaNdarrayType) or if (isinstance(w.type, gpuarray.GpuArrayType) and
isinstance(w.type, gpuarray.GpuArrayType)) and
isinstance(w_copy.type, tensor.TensorType)): isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs: for o in inner_outs:
new_givens = traverse(o, w, w_copy, new_givens) new_givens = traverse(o, w, w_copy, new_givens)
...@@ -1046,7 +1044,7 @@ def scan(fn, ...@@ -1046,7 +1044,7 @@ def scan(fn,
info['name'] = name info['name'] = name
info['mode'] = mode info['mode'] = mode
info['destroy_map'] = OrderedDict() info['destroy_map'] = OrderedDict()
info['gpu'] = False info['gpua'] = False
info['as_while'] = as_while info['as_while'] = as_while
info['profile'] = profile info['profile'] = profile
info['allow_gc'] = allow_gc info['allow_gc'] = allow_gc
...@@ -1072,7 +1070,7 @@ def scan(fn, ...@@ -1072,7 +1070,7 @@ def scan(fn,
arg = tensor.as_tensor_variable(arg) arg = tensor.as_tensor_variable(arg)
except TypeError: except TypeError:
# This happens for Random States for e.g. but it is a good way # This happens for Random States for e.g. but it is a good way
# to make sure no input is a cuda ndarrays # to make sure all inputs are tensors.
pass pass
scan_inputs += [arg] scan_inputs += [arg]
scan_outs = local_op(*scan_inputs) scan_outs = local_op(*scan_inputs)
......
...@@ -125,8 +125,6 @@ class Scan(PureOp): ...@@ -125,8 +125,6 @@ class Scan(PureOp):
info, info,
typeConstructor=None, typeConstructor=None,
): ):
if 'gpua' not in info:
info['gpua'] = False
# adding properties into self # adding properties into self
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
...@@ -204,7 +202,7 @@ class Scan(PureOp): ...@@ -204,7 +202,7 @@ class Scan(PureOp):
self.n_shared_outs) self.n_shared_outs)
self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
self.n_tap_outs = self.n_mit_mot + self.n_mit_sot self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
if self.info['gpu'] or self.info['gpua']: if self.info['gpua']:
self._hash_inner_graph = self.info['gpu_hash'] self._hash_inner_graph = self.info['gpu_hash']
else: else:
# Do the missing inputs check here to have the error early. # Do the missing inputs check here to have the error early.
...@@ -250,27 +248,6 @@ class Scan(PureOp): ...@@ -250,27 +248,6 @@ class Scan(PureOp):
"type '%s' and '%s' respectively." % "type '%s' and '%s' respectively." %
(self.name, type_input, type_output)) (self.name, type_input, type_output))
# If scan has the flag 'gpu' set to false (meaning that is shouldn't
# use the CUDA gpu backend ), ensure that is has no input and no
# output with type CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType
if not self.info.get("gpu", False):
for inp in self.inputs:
if isinstance(inp.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the inputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
for out in self.outputs:
if isinstance(out.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the outputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
# If scan has the flag 'gpua' set to false (meaning that is shouldn't # If scan has the flag 'gpua' set to false (meaning that is shouldn't
# use the gpuarray gpu backend ), ensure that is has no input and no # use the gpuarray gpu backend ), ensure that is has no input and no
# output with type GpuArrayType # output with type GpuArrayType
...@@ -297,9 +274,6 @@ class Scan(PureOp): ...@@ -297,9 +274,6 @@ class Scan(PureOp):
if "allow_gc" not in self.__dict__: if "allow_gc" not in self.__dict__:
self.allow_gc = True self.allow_gc = True
self.info['allow_gc'] = True self.info['allow_gc'] = True
if not hasattr(self, 'gpua'):
self.gpua = False
self.info['gpua'] = False
if not hasattr(self, 'var_mappings'): if not hasattr(self, 'var_mappings'):
# Generate the mappings between inner and outer inputs and outputs # Generate the mappings between inner and outer inputs and outputs
# if they haven't already been generated. # if they haven't already been generated.
...@@ -436,9 +410,9 @@ class Scan(PureOp): ...@@ -436,9 +410,9 @@ class Scan(PureOp):
def format(var, as_var): def format(var, as_var):
""" """
This functions ensures that ``out`` has the same dtype as This functions ensures that ``out`` has the same dtype as
``inp`` as well as calling filter_variable to make sure they are ``inp`` as well as calling filter_variable to make sure
both TensorType or CudaNdarrayType. It internally deals with the they are both TensorType or GpuArrayType. It internally
corner case where inp.ndim + 1 = out.ndim deals with the corner case where inp.ndim + 1 = out.ndim
""" """
if not hasattr(var, 'dtype'): if not hasattr(var, 'dtype'):
...@@ -672,7 +646,7 @@ class Scan(PureOp): ...@@ -672,7 +646,7 @@ class Scan(PureOp):
'n_seqs', 'tap_array', 'n_seqs', 'tap_array',
'as_while', 'n_mit_sot', 'destroy_map', 'as_while', 'n_mit_sot', 'destroy_map',
'n_nit_sot', 'n_shared_outs', 'n_nit_sot', 'n_shared_outs',
'n_sit_sot', 'gpu', 'gpua', 'n_mit_mot_outs', 'n_sit_sot', 'gpua', 'n_mit_mot_outs',
'n_mit_mot', 'mit_mot_out_slices'] 'n_mit_mot', 'mit_mot_out_slices']
# This are some safety checks ( namely that the inner graph has the # This are some safety checks ( namely that the inner graph has the
# same number of inputs and same number of outputs ) # same number of inputs and same number of outputs )
...@@ -696,7 +670,7 @@ class Scan(PureOp): ...@@ -696,7 +670,7 @@ class Scan(PureOp):
other.inputs) other.inputs)
def __str__(self): def __str__(self):
if self.gpu: if self.gpua:
gpu_str = 'gpu' gpu_str = 'gpu'
else: else:
gpu_str = 'cpu' gpu_str = 'cpu'
...@@ -1318,7 +1292,7 @@ class Scan(PureOp): ...@@ -1318,7 +1292,7 @@ class Scan(PureOp):
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them # etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its # with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect # execution. Also keep pointers to their data to be able to detect
...@@ -1336,7 +1310,7 @@ class Scan(PureOp): ...@@ -1336,7 +1310,7 @@ class Scan(PureOp):
else: else:
old_output_data[idx] = var.gpudata old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the # etc) associated with mitmot inputs currently in the
# input_storage to be able to compare them with the content of the # input_storage to be able to compare them with the content of the
# input_storage after the execution of the function. Also keep # input_storage after the execution of the function. Also keep
...@@ -2514,7 +2488,7 @@ class Scan(PureOp): ...@@ -2514,7 +2488,7 @@ class Scan(PureOp):
info['n_seqs'] = len(outer_inp_seqs) info['n_seqs'] = len(outer_inp_seqs)
info['n_mit_sot'] = 0 info['n_mit_sot'] = 0
info['tap_array'] = new_tap_array info['tap_array'] = new_tap_array
info['gpu'] = False info['gpua'] = False
info['n_mit_mot'] = len(outer_inp_mitmot) info['n_mit_mot'] = len(outer_inp_mitmot)
info['n_mit_mot_outs'] = n_mitmot_outs info['n_mit_mot_outs'] = n_mitmot_outs
info['mit_mot_out_slices'] = mitmot_out_taps info['mit_mot_out_slices'] = mitmot_out_taps
...@@ -2683,7 +2657,7 @@ class Scan(PureOp): ...@@ -2683,7 +2657,7 @@ class Scan(PureOp):
info['n_mit_mot'] = self.n_mit_mot * 2 info['n_mit_mot'] = self.n_mit_mot * 2
info['n_nit_sot'] = self.n_nit_sot * 2 info['n_nit_sot'] = self.n_nit_sot * 2
info['n_shared_outs'] = self.n_shared_outs info['n_shared_outs'] = self.n_shared_outs
info['gpu'] = False info['gpua'] = False
info['as_while'] = self.as_while info['as_while'] = self.as_while
info['profile'] = self.profile info['profile'] = self.profile
info['truncate_gradient'] = self.truncate_gradient info['truncate_gradient'] = self.truncate_gradient
......
...@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer):
""" """
def __init__(self, typeInfer=None, gpu_flag=False, gpua_flag=False): def __init__(self, typeInfer=None, gpua_flag=False):
Optimizer.__init__(self) Optimizer.__init__(self)
self.typeInfer = typeInfer self.typeInfer = typeInfer
self.gpu_flag = gpu_flag
self.gpua_flag = gpua_flag self.gpua_flag = gpua_flag
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
...@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer):
def apply(self, fgraph): def apply(self, fgraph):
# Depending on the values of gpu_flag and gpua_flag, get the list of # Depending on the value of gpua_flag, get the list of memory
# memory allocation ops that the optimization should be able to handle # allocation ops that the optimization should be able to
# handle
alloc_ops = (Alloc, AllocEmpty) alloc_ops = (Alloc, AllocEmpty)
if self.gpu_flag:
alloc_ops += (theano.sandbox.cuda.GpuAlloc,
theano.sandbox.cuda.GpuAllocEmpty)
if self.gpua_flag: if self.gpua_flag:
# gpuarray might be imported but not its GpuAlloc and # gpuarray might be imported but not its GpuAlloc and
# GpuAllopEmpty ops. # GpuAllopEmpty ops.
...@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer):
nodes = fgraph.toposort()[::-1] nodes = fgraph.toposort()[::-1]
scan_nodes = [x for x in nodes scan_nodes = [x for x in nodes
if (isinstance(x.op, scan_op.Scan) and if (isinstance(x.op, scan_op.Scan) and
x.op.info['gpu'] == self.gpu_flag and
x.op.info['gpua'] == self.gpua_flag)] x.op.info['gpua'] == self.gpua_flag)]
for scan_idx in xrange(len(scan_nodes)): for scan_idx in xrange(len(scan_nodes)):
...@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan') ...@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
# ScanSaveMem should execute only once per node. # ScanSaveMem should execute only once per node.
optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan') optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan')
optdb.register('scanOp_make_inplace', optdb.register('scanOp_make_inplace',
ScanInplaceOptimizer(typeInfer=None, ScanInplaceOptimizer(typeInfer=None),
gpu_flag=False),
75, 75,
'fast_run', 'fast_run',
'inplace', 'inplace',
......
...@@ -355,7 +355,7 @@ def perform( ...@@ -355,7 +355,7 @@ def perform(
pdx = offset + n_shared_outs pdx = offset + n_shared_outs
output_storage[<unsigned int>pdx].storage[0] = None output_storage[<unsigned int>pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them # etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its # with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect # execution. Also keep pointers to their data to be able to detect
...@@ -373,7 +373,7 @@ def perform( ...@@ -373,7 +373,7 @@ def perform(
else: else:
old_output_data[idx] = var.gpudata old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays, # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the input_storage to # etc) associated with mitmot inputs currently in the input_storage to
# be able to compare them with the content of the input_storage after # be able to compare them with the content of the input_storage after
# the execution of the function. Also keep pointers to their data to # the execution of the function. Also keep pointers to their data to
......
...@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None): ...@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None):
try: try:
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
except TypeError: except TypeError:
# This could happen for example for random states, and I really # This could happen for example for random states
# want to avoid the convoluted logic that checks for cuda
# ndarrays
pass pass
# Cast x if needed. If x has a test value, this will also cast it. # Cast x if needed. If x has a test value, this will also cast it.
...@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited: if out in visited:
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda
from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
from theano.gpuarray import pygpu_activated from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType from theano.gpuarray.type import GpuArrayType
if out == x: if out == x:
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, GpuArrayType) assert isinstance(x.type, GpuArrayType)
d[out] = GpuFromHost(x.type.context_name)(x_copy) d[out] = GpuFromHost(x.type.context_name)(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
return d return d
elif (cuda.cuda_available and
out.owner.op == cuda.host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
elif (pygpu_activated and elif (pygpu_activated and
out.owner.op == host_from_gpu and out.owner.op == host_from_gpu and
out.owner.inputs == [x]): out.owner.inputs == [x]):
...@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs): ...@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs):
info['n_nit_sot'] = 0 info['n_nit_sot'] = 0
info['truncate_gradient'] = op.info['truncate_gradient'] info['truncate_gradient'] = op.info['truncate_gradient']
info['name'] = op.info['name'] info['name'] = op.info['name']
info['gpu'] = op.info['gpu']
info['gpua'] = op.info['gpua'] info['gpua'] = op.info['gpua']
info['mode'] = op.info['mode'] info['mode'] = op.info['mode']
info['as_while'] = op.info['as_while'] info['as_while'] = op.info['as_while']
...@@ -1257,7 +1245,7 @@ class scan_args(object): ...@@ -1257,7 +1245,7 @@ class scan_args(object):
self.other_info = OrderedDict() self.other_info = OrderedDict()
for k in ('truncate_gradient', 'name', 'mode', 'destroy_map', for k in ('truncate_gradient', 'name', 'mode', 'destroy_map',
'gpu', 'gpua', 'as_while', 'profile', 'allow_gc'): 'gpua', 'as_while', 'profile', 'allow_gc'):
if k in info: if k in info:
self.other_info[k] = info[k] self.other_info[k] = info[k]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论