提交 df95d9a9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove tentacles in scan.

上级 a388d94d
......@@ -998,22 +998,20 @@ def scan(fn,
shared_inner_outputs)
if condition is not None:
inner_outs.append(condition)
# Cuda and Gpuarray are imported here, instead of being imported on top of
# gpuarray is imported here, instead of being imported on top of
# the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley.
from theano.sandbox import cuda
from theano import gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated:
if gpuarray.pygpu_activated:
# very often we end up in this situation when we want to
# replace w with w_copy, where w is a GPU variable
# and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| ,
# variables are put on GPU right away >:| ,
new_givens = OrderedDict()
for w, w_copy in iteritems(givens):
if ((isinstance(w.type, cuda.CudaNdarrayType) or
isinstance(w.type, gpuarray.GpuArrayType)) and
if (isinstance(w.type, gpuarray.GpuArrayType) and
isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs:
new_givens = traverse(o, w, w_copy, new_givens)
......@@ -1046,7 +1044,7 @@ def scan(fn,
info['name'] = name
info['mode'] = mode
info['destroy_map'] = OrderedDict()
info['gpu'] = False
info['gpua'] = False
info['as_while'] = as_while
info['profile'] = profile
info['allow_gc'] = allow_gc
......@@ -1072,7 +1070,7 @@ def scan(fn,
arg = tensor.as_tensor_variable(arg)
except TypeError:
# This happens for Random States for e.g. but it is a good way
# to make sure no input is a cuda ndarrays
# to make sure all inputs are tensors.
pass
scan_inputs += [arg]
scan_outs = local_op(*scan_inputs)
......
......@@ -125,8 +125,6 @@ class Scan(PureOp):
info,
typeConstructor=None,
):
if 'gpua' not in info:
info['gpua'] = False
# adding properties into self
self.inputs = inputs
self.outputs = outputs
......@@ -204,7 +202,7 @@ class Scan(PureOp):
self.n_shared_outs)
self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
if self.info['gpu'] or self.info['gpua']:
if self.info['gpua']:
self._hash_inner_graph = self.info['gpu_hash']
else:
# Do the missing inputs check here to have the error early.
......@@ -250,27 +248,6 @@ class Scan(PureOp):
"type '%s' and '%s' respectively." %
(self.name, type_input, type_output))
# If scan has the flag 'gpu' set to false (meaning that is shouldn't
# use the CUDA gpu backend ), ensure that is has no input and no
# output with type CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType
if not self.info.get("gpu", False):
for inp in self.inputs:
if isinstance(inp.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the inputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
for out in self.outputs:
if isinstance(out.type, CudaNdarrayType):
raise TypeError("Inconsistency in the inner graph of "
"scan '%s' : one of the outputs to the "
"inner graph is of type CudaNdarray but "
"the attributes of the scan op indicate "
"that it shouldn't be the case")
# If scan has the flag 'gpua' set to false (meaning that is shouldn't
# use the gpuarray gpu backend ), ensure that is has no input and no
# output with type GpuArrayType
......@@ -297,9 +274,6 @@ class Scan(PureOp):
if "allow_gc" not in self.__dict__:
self.allow_gc = True
self.info['allow_gc'] = True
if not hasattr(self, 'gpua'):
self.gpua = False
self.info['gpua'] = False
if not hasattr(self, 'var_mappings'):
# Generate the mappings between inner and outer inputs and outputs
# if they haven't already been generated.
......@@ -436,9 +410,9 @@ class Scan(PureOp):
def format(var, as_var):
"""
This functions ensures that ``out`` has the same dtype as
``inp`` as well as calling filter_variable to make sure they are
both TensorType or CudaNdarrayType. It internally deals with the
corner case where inp.ndim + 1 = out.ndim
``inp`` as well as calling filter_variable to make sure
they are both TensorType or GpuArrayType. It internally
deals with the corner case where inp.ndim + 1 = out.ndim
"""
if not hasattr(var, 'dtype'):
......@@ -672,7 +646,7 @@ class Scan(PureOp):
'n_seqs', 'tap_array',
'as_while', 'n_mit_sot', 'destroy_map',
'n_nit_sot', 'n_shared_outs',
'n_sit_sot', 'gpu', 'gpua', 'n_mit_mot_outs',
'n_sit_sot', 'gpua', 'n_mit_mot_outs',
'n_mit_mot', 'mit_mot_out_slices']
# This are some safety checks ( namely that the inner graph has the
# same number of inputs and same number of outputs )
......@@ -696,7 +670,7 @@ class Scan(PureOp):
other.inputs)
def __str__(self):
if self.gpu:
if self.gpua:
gpu_str = 'gpu'
else:
gpu_str = 'cpu'
......@@ -1318,7 +1292,7 @@ class Scan(PureOp):
pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays,
# 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect
......@@ -1336,7 +1310,7 @@ class Scan(PureOp):
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the
# input_storage to be able to compare them with the content of the
# input_storage after the execution of the function. Also keep
......@@ -2514,7 +2488,7 @@ class Scan(PureOp):
info['n_seqs'] = len(outer_inp_seqs)
info['n_mit_sot'] = 0
info['tap_array'] = new_tap_array
info['gpu'] = False
info['gpua'] = False
info['n_mit_mot'] = len(outer_inp_mitmot)
info['n_mit_mot_outs'] = n_mitmot_outs
info['mit_mot_out_slices'] = mitmot_out_taps
......@@ -2683,7 +2657,7 @@ class Scan(PureOp):
info['n_mit_mot'] = self.n_mit_mot * 2
info['n_nit_sot'] = self.n_nit_sot * 2
info['n_shared_outs'] = self.n_shared_outs
info['gpu'] = False
info['gpua'] = False
info['as_while'] = self.as_while
info['profile'] = self.profile
info['truncate_gradient'] = self.truncate_gradient
......
......@@ -914,10 +914,9 @@ class ScanInplaceOptimizer(Optimizer):
"""
def __init__(self, typeInfer=None, gpu_flag=False, gpua_flag=False):
def __init__(self, typeInfer=None, gpua_flag=False):
Optimizer.__init__(self)
self.typeInfer = typeInfer
self.gpu_flag = gpu_flag
self.gpua_flag = gpua_flag
def add_requirements(self, fgraph):
......@@ -998,12 +997,10 @@ class ScanInplaceOptimizer(Optimizer):
def apply(self, fgraph):
# Depending on the values of gpu_flag and gpua_flag, get the list of
# memory allocation ops that the optimization should be able to handle
# Depending on the value of gpua_flag, get the list of memory
# allocation ops that the optimization should be able to
# handle
alloc_ops = (Alloc, AllocEmpty)
if self.gpu_flag:
alloc_ops += (theano.sandbox.cuda.GpuAlloc,
theano.sandbox.cuda.GpuAllocEmpty)
if self.gpua_flag:
# gpuarray might be imported but not its GpuAlloc and
# GpuAllopEmpty ops.
......@@ -1016,7 +1013,6 @@ class ScanInplaceOptimizer(Optimizer):
nodes = fgraph.toposort()[::-1]
scan_nodes = [x for x in nodes
if (isinstance(x.op, scan_op.Scan) and
x.op.info['gpu'] == self.gpu_flag and
x.op.info['gpua'] == self.gpua_flag)]
for scan_idx in xrange(len(scan_nodes)):
......@@ -2263,8 +2259,7 @@ optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
# ScanSaveMem should execute only once per node.
optdb.register('scanOp_save_mem', ScanSaveMem(), 1.61, 'fast_run', 'scan')
optdb.register('scanOp_make_inplace',
ScanInplaceOptimizer(typeInfer=None,
gpu_flag=False),
ScanInplaceOptimizer(typeInfer=None),
75,
'fast_run',
'inplace',
......
......@@ -355,7 +355,7 @@ def perform(
pdx = offset + n_shared_outs
output_storage[<unsigned int>pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, CudaNdarrays,
# 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them
# with the actual outputs of the inner function after its
# execution. Also keep pointers to their data to be able to detect
......@@ -373,7 +373,7 @@ def perform(
else:
old_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, CudaNdarrays,
# 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the input_storage to
# be able to compare them with the content of the input_storage after
# the execution of the function. Also keep pointers to their data to
......
......@@ -84,9 +84,7 @@ def safe_new(x, tag='', dtype=None):
try:
x = tensor.as_tensor_variable(x)
except TypeError:
# This could happen for example for random states, and I really
# want to avoid the convoluted logic that checks for cuda
# ndarrays
# This could happen for example for random states
pass
# Cast x if needed. If x has a test value, this will also cast it.
......@@ -151,24 +149,15 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited:
return d
visited.add(out)
from theano.sandbox import cuda
from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType
if out == x:
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, GpuArrayType)
d[out] = GpuFromHost(x.type.context_name)(x_copy)
assert isinstance(x.type, GpuArrayType)
d[out] = GpuFromHost(x.type.context_name)(x_copy)
return d
elif out.owner is None:
return d
elif (cuda.cuda_available and
out.owner.op == cuda.host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
elif (pygpu_activated and
out.owner.op == host_from_gpu and
out.owner.inputs == [x]):
......@@ -994,7 +983,6 @@ def compress_outs(op, not_required, inputs):
info['n_nit_sot'] = 0
info['truncate_gradient'] = op.info['truncate_gradient']
info['name'] = op.info['name']
info['gpu'] = op.info['gpu']
info['gpua'] = op.info['gpua']
info['mode'] = op.info['mode']
info['as_while'] = op.info['as_while']
......@@ -1257,7 +1245,7 @@ class scan_args(object):
self.other_info = OrderedDict()
for k in ('truncate_gradient', 'name', 'mode', 'destroy_map',
'gpu', 'gpua', 'as_while', 'profile', 'allow_gc'):
'gpua', 'as_while', 'profile', 'allow_gc'):
if k in info:
self.other_info[k] = info[k]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论