提交 7d70c3a4 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Get rid of typeConstructor in Scan.

上级 0f2fd4b4
......@@ -2000,12 +2000,6 @@ def local_gpu_extract_diagonal(node):
gpu_from_host(diag_node.inputs[0]))]
return False
def typeConstructor(broadcastable, dtype):
if dtype == 'float32':
return CudaNdarrayType(broadcastable=broadcastable)
else:
return tensor.TensorType(broadcastable=broadcastable, dtype=dtype)
@register_opt('scan')
@local_optimizer([gpu_from_host, scan_op.Scan])
def gpuScanOptimization(node):
......@@ -2065,9 +2059,7 @@ def gpuScanOptimization(node):
nw_op = scan_op.Scan(scan_ins,
scan_outs,
info,
typeConstructor=typeConstructor).make_node(
*nw_ins)
info).make_node(*nw_ins)
_outputs = nw_op.outputs
return _outputs
......@@ -2113,8 +2105,7 @@ def gpuScanOptimization(node):
_outputs = scan_op.Scan(
scan_ins,
scan_outs,
info,
typeConstructor=typeConstructor).make_node(*nw_ins).outputs
info).make_node(*nw_ins).outputs
outputs = []
for x, y in zip(_outputs, node.outputs):
if isinstance(y.type, CudaNdarrayType):
......@@ -2126,8 +2117,7 @@ def gpuScanOptimization(node):
optdb.register('gpu_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeConstructor=typeConstructor,
gpu_flag=True),
scan_opt.ScanInplaceOptimizer(gpu_flag=True),
75,
'gpu',
'fast_run',
......
......@@ -716,13 +716,11 @@ def local_scan_to_gpua(node):
_cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
info['gpu_hash'] = hash(_cmodule_key)
nw_op = scan_op.Scan(scan_ins, scan_outs, info,
typeConstructor=GpuArrayType).make_node(*nw_ins)
nw_op = scan_op.Scan(scan_ins, scan_outs, info).make_node(*nw_ins)
return nw_op.outputs
optdb.register('gpua_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType,
gpua_flag=True),
scan_opt.ScanInplaceOptimizer(gpua_flag=True),
75,
'gpua',
'fast_run',
......
......@@ -15,6 +15,7 @@ from theano.sandbox.gpuarray.tests.test_basic_ops import mode_with_gpu
class T_Scan(TestCase):
def setUp(self):
utt.seed_rng()
super(T_Scan, self).setUp()
def test_one_sequence_one_output_weights_gpu1(self):
def f_rnn(u_t, x_tm1, W_in, W):
......
......@@ -47,7 +47,6 @@ class Scan(PureOp):
inputs,
outputs,
info,
typeConstructor=None,
):
"""
:param inputs: inputs of the inner function of scan
......@@ -56,21 +55,6 @@ class Scan(PureOp):
the scan op (like number of different types of
arguments, name, mode, if it should run on GPU or
not, etc.)
:param typeConstructor: function that constructs an equivalent
to Theano TensorType
Note: ``typeConstructor`` had been added to refactor how
Theano deals with the GPU. If it runs on the GPU, scan needs
to construct certain outputs (those who reside in the GPU
memory) as the GPU-specific type. However we can not import
gpu code in this file (as it is in sandbox, and not available
on each machine) so the workaround is that the GPU
optimization passes to the constructor of this class a
function that is able to construct a GPU type. This way the
class Scan does not need to be aware of the details for the
GPU, it just constructs any tensor using this function (which
by default constructs normal tensors).
"""
if 'gpua' not in info:
info['gpua'] = False
......@@ -87,19 +71,13 @@ class Scan(PureOp):
self.output_types = []
idx = 0
jdx = 0
tensorConstructor = lambda broadcastable, dtype: TensorType(
broadcastable=broadcastable, dtype=dtype)
if typeConstructor is None:
typeConstructor = tensorConstructor
while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per
# output sequence
o = outputs[idx]
self.output_types.append(
typeConstructor(
broadcastable=(False,) + o.type.broadcastable,
dtype=o.type.dtype))
o.type.clone(broadcastable=(False,) + o.type.broadcastable))
idx += len(self.mit_mot_out_slices[jdx])
jdx += 1
......@@ -109,9 +87,7 @@ class Scan(PureOp):
for o in outputs[idx:end]:
self.output_types.append(
typeConstructor(
broadcastable=(False,) + o.type.broadcastable,
dtype=o.type.dtype))
o.type.clone(broadcastable=(False,) + o.type.broadcastable))
# shared outputs + possibly the ending condition
for o in outputs[end:]:
......@@ -232,10 +208,9 @@ class Scan(PureOp):
if rval.ndim == as_var.ndim:
rval = as_var.type.filter_variable(rval)
else:
tmp = as_var.type.__class__(
tmp = as_var.type.clone(
broadcastable=tuple(var.broadcastable[:1])+\
tuple(as_var.broadcastable),
dtype=as_var.dtype)
tuple(as_var.broadcastable))
rval = tmp.filter_variable(rval)
return rval
......
......@@ -916,9 +916,8 @@ class PushOutScanOutput(gof.Optimizer):
class ScanInplaceOptimizer(Optimizer):
"""Graph optimizer for Scan(makes it run inplace)"""
def __init__(self, typeConstructor=None, gpu_flag=False, gpua_flag=False):
def __init__(self, gpu_flag=False, gpua_flag=False):
Optimizer.__init__(self)
self.typeConstructor = typeConstructor
self.gpu_flag = gpu_flag
self.gpua_flag = gpua_flag
......@@ -960,8 +959,7 @@ class ScanInplaceOptimizer(Optimizer):
inputs = ls_begin + ls + ls_end
new_op = scan_op.Scan(op.inputs,
op.outputs,
info,
typeConstructor=self.typeConstructor)
info)
# Do not call make_node for test_value
new_outs = new_op(*inputs, **dict(return_list=True))
......@@ -2086,8 +2084,7 @@ scan_eqopt2 = theano.gof.EquilibriumDB()
optdb.register('scan_eqopt1', scan_eqopt1, .1, 'fast_run', 'scan')
optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
optdb.register('scanOp_make_inplace',
ScanInplaceOptimizer(typeConstructor=None,
gpu_flag=False),
ScanInplaceOptimizer(),
75,
'fast_run',
'inplace',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论