提交 67115fbb authored 作者: Frederic's avatar Frederic

Revert "Get rid of typeConstructor in Scan."

This reverts commit be12a5cf.
上级 ed20a5cb
...@@ -2055,6 +2055,12 @@ def local_gpu_extract_diagonal(node): ...@@ -2055,6 +2055,12 @@ def local_gpu_extract_diagonal(node):
gpu_from_host(diag_node.inputs[0]))] gpu_from_host(diag_node.inputs[0]))]
return False return False
def typeConstructor(broadcastable, dtype):
if dtype == 'float32':
return CudaNdarrayType(broadcastable=broadcastable)
else:
return tensor.TensorType(broadcastable=broadcastable, dtype=dtype)
@register_opt('scan') @register_opt('scan')
@local_optimizer([gpu_from_host, scan_op.Scan]) @local_optimizer([gpu_from_host, scan_op.Scan])
def gpuScanOptimization(node): def gpuScanOptimization(node):
...@@ -2114,7 +2120,9 @@ def gpuScanOptimization(node): ...@@ -2114,7 +2120,9 @@ def gpuScanOptimization(node):
nw_op = scan_op.Scan(scan_ins, nw_op = scan_op.Scan(scan_ins,
scan_outs, scan_outs,
info).make_node(*nw_ins) info,
typeConstructor=typeConstructor).make_node(
*nw_ins)
_outputs = nw_op.outputs _outputs = nw_op.outputs
return _outputs return _outputs
...@@ -2160,7 +2168,8 @@ def gpuScanOptimization(node): ...@@ -2160,7 +2168,8 @@ def gpuScanOptimization(node):
_outputs = scan_op.Scan( _outputs = scan_op.Scan(
scan_ins, scan_ins,
scan_outs, scan_outs,
info).make_node(*nw_ins).outputs info,
typeConstructor=typeConstructor).make_node(*nw_ins).outputs
outputs = [] outputs = []
for x, y in zip(_outputs, node.outputs): for x, y in zip(_outputs, node.outputs):
if isinstance(y.type, CudaNdarrayType): if isinstance(y.type, CudaNdarrayType):
...@@ -2172,7 +2181,8 @@ def gpuScanOptimization(node): ...@@ -2172,7 +2181,8 @@ def gpuScanOptimization(node):
optdb.register('gpu_scanOp_make_inplace', optdb.register('gpu_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(gpu_flag=True), scan_opt.ScanInplaceOptimizer(typeConstructor=typeConstructor,
gpu_flag=True),
75, 75,
'gpu', 'gpu',
'fast_run', 'fast_run',
......
...@@ -716,11 +716,13 @@ def local_scan_to_gpua(node): ...@@ -716,11 +716,13 @@ def local_scan_to_gpua(node):
_cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, []) _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
info['gpu_hash'] = hash(_cmodule_key) info['gpu_hash'] = hash(_cmodule_key)
nw_op = scan_op.Scan(scan_ins, scan_outs, info).make_node(*nw_ins) nw_op = scan_op.Scan(scan_ins, scan_outs, info,
typeConstructor=GpuArrayType).make_node(*nw_ins)
return nw_op.outputs return nw_op.outputs
optdb.register('gpua_scanOp_make_inplace', optdb.register('gpua_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(gpua_flag=True), scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType,
gpua_flag=True),
75, 75,
'gpua', 'gpua',
'fast_run', 'fast_run',
......
...@@ -15,7 +15,6 @@ from theano.sandbox.gpuarray.tests.test_basic_ops import mode_with_gpu ...@@ -15,7 +15,6 @@ from theano.sandbox.gpuarray.tests.test_basic_ops import mode_with_gpu
class T_Scan(TestCase): class T_Scan(TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
super(T_Scan, self).setUp()
def test_one_sequence_one_output_weights_gpu1(self): def test_one_sequence_one_output_weights_gpu1(self):
def f_rnn(u_t, x_tm1, W_in, W): def f_rnn(u_t, x_tm1, W_in, W):
......
...@@ -49,6 +49,7 @@ class Scan(PureOp): ...@@ -49,6 +49,7 @@ class Scan(PureOp):
inputs, inputs,
outputs, outputs,
info, info,
typeConstructor=None,
): ):
""" """
:param inputs: inputs of the inner function of scan :param inputs: inputs of the inner function of scan
...@@ -57,6 +58,21 @@ class Scan(PureOp): ...@@ -57,6 +58,21 @@ class Scan(PureOp):
the scan op (like number of different types of the scan op (like number of different types of
arguments, name, mode, if it should run on GPU or arguments, name, mode, if it should run on GPU or
not, etc.) not, etc.)
:param typeConstructor: function that constructs an equivalent
to Theano TensorType
Note: ``typeConstructor`` had been added to refactor how
Theano deals with the GPU. If it runs on the GPU, scan needs
to construct certain outputs (those who reside in the GPU
memory) as the GPU-specific type. However we can not import
gpu code in this file (as it is in sandbox, and not available
on each machine) so the workaround is that the GPU
optimization passes to the constructor of this class a
function that is able to construct a GPU type. This way the
class Scan does not need to be aware of the details for the
GPU, it just constructs any tensor using this function (which
by default constructs normal tensors).
""" """
if 'gpua' not in info: if 'gpua' not in info:
info['gpua'] = False info['gpua'] = False
...@@ -72,13 +88,19 @@ class Scan(PureOp): ...@@ -72,13 +88,19 @@ class Scan(PureOp):
self.output_types = [] self.output_types = []
idx = 0 idx = 0
jdx = 0 jdx = 0
tensorConstructor = lambda broadcastable, dtype: TensorType(
broadcastable=broadcastable, dtype=dtype)
if typeConstructor is None:
typeConstructor = tensorConstructor
while idx < self.n_mit_mot_outs: while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per # Not that for mit_mot there are several output slices per
# output sequence # output sequence
o = outputs[idx] o = outputs[idx]
self.output_types.append( self.output_types.append(
o.type.clone(broadcastable=(False,) + o.type.broadcastable)) typeConstructor(
broadcastable=(False,) + o.type.broadcastable,
dtype=o.type.dtype))
idx += len(self.mit_mot_out_slices[jdx]) idx += len(self.mit_mot_out_slices[jdx])
jdx += 1 jdx += 1
...@@ -88,7 +110,9 @@ class Scan(PureOp): ...@@ -88,7 +110,9 @@ class Scan(PureOp):
for o in outputs[idx:end]: for o in outputs[idx:end]:
self.output_types.append( self.output_types.append(
o.type.clone(broadcastable=(False,) + o.type.broadcastable)) typeConstructor(
broadcastable=(False,) + o.type.broadcastable,
dtype=o.type.dtype))
# shared outputs + possibly the ending condition # shared outputs + possibly the ending condition
for o in outputs[end:]: for o in outputs[end:]:
......
...@@ -916,8 +916,9 @@ class PushOutScanOutput(gof.Optimizer): ...@@ -916,8 +916,9 @@ class PushOutScanOutput(gof.Optimizer):
class ScanInplaceOptimizer(Optimizer): class ScanInplaceOptimizer(Optimizer):
"""Graph optimizer for Scan(makes it run inplace)""" """Graph optimizer for Scan(makes it run inplace)"""
def __init__(self, gpu_flag=False, gpua_flag=False): def __init__(self, typeConstructor=None, gpu_flag=False, gpua_flag=False):
Optimizer.__init__(self) Optimizer.__init__(self)
self.typeConstructor = typeConstructor
self.gpu_flag = gpu_flag self.gpu_flag = gpu_flag
self.gpua_flag = gpua_flag self.gpua_flag = gpua_flag
...@@ -959,7 +960,8 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -959,7 +960,8 @@ class ScanInplaceOptimizer(Optimizer):
inputs = ls_begin + ls + ls_end inputs = ls_begin + ls + ls_end
new_op = scan_op.Scan(op.inputs, new_op = scan_op.Scan(op.inputs,
op.outputs, op.outputs,
info) info,
typeConstructor=self.typeConstructor)
# Do not call make_node for test_value # Do not call make_node for test_value
new_outs = new_op(*inputs, **dict(return_list=True)) new_outs = new_op(*inputs, **dict(return_list=True))
...@@ -2085,7 +2087,8 @@ scan_eqopt2 = theano.gof.EquilibriumDB() ...@@ -2085,7 +2087,8 @@ scan_eqopt2 = theano.gof.EquilibriumDB()
optdb.register('scan_eqopt1', scan_eqopt1, .1, 'fast_run', 'scan') optdb.register('scan_eqopt1', scan_eqopt1, .1, 'fast_run', 'scan')
optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan') optdb.register('scan_eqopt2', scan_eqopt2, 1.6, 'fast_run', 'scan')
optdb.register('scanOp_make_inplace', optdb.register('scanOp_make_inplace',
ScanInplaceOptimizer(), ScanInplaceOptimizer(typeConstructor=None,
gpu_flag=False),
75, 75,
'fast_run', 'fast_run',
'inplace', 'inplace',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论