提交 88c6d32a authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5533 from nouiz/scan

Scan compile/grad speed up
...@@ -207,10 +207,10 @@ class Scan(PureOp): ...@@ -207,10 +207,10 @@ class Scan(PureOp):
if self.info['gpu'] or self.info['gpua']: if self.info['gpu'] or self.info['gpua']:
self._hash_inner_graph = self.info['gpu_hash'] self._hash_inner_graph = self.info['gpu_hash']
else: else:
tmp_in, tmp_out = scan_utils.reconstruct_graph(self.inputs, # Do the missing inputs check here to have the error early.
self.outputs) for var in theano.gof.graph.inputs(self.outputs, self.inputs):
# This is actually required for the line just after. if var not in self.inputs and not isinstance(var, theano.Constant):
gof.FunctionGraph(tmp_in, tmp_out, clone=False) raise theano.gof.MissingInputError("ScanOp is missing an input.")
self._cmodule_key = gof.CLinker().cmodule_key_variables(self.inputs, self._cmodule_key = gof.CLinker().cmodule_key_variables(self.inputs,
self.outputs, self.outputs,
[]) [])
...@@ -1987,10 +1987,8 @@ class Scan(PureOp): ...@@ -1987,10 +1987,8 @@ class Scan(PureOp):
if self.truncate_gradient != -1: if self.truncate_gradient != -1:
grad_steps = tensor.minimum(grad_steps, self.truncate_gradient) grad_steps = tensor.minimum(grad_steps, self.truncate_gradient)
rval = scan_utils.reconstruct_graph(self.inputs, self_inputs = self.inputs
self.outputs) self_outputs = self.outputs
self_inputs = rval[0]
self_outputs = rval[1]
# differentiable inputs # differentiable inputs
diff_inputs = (self.inner_seqs(self_inputs) + diff_inputs = (self.inner_seqs(self_inputs) +
self.inner_mitmot(self_inputs) + self.inner_mitmot(self_inputs) +
...@@ -2649,13 +2647,13 @@ class Scan(PureOp): ...@@ -2649,13 +2647,13 @@ class Scan(PureOp):
return gradients return gradients
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
# Step 0. Don't work on the orignal tensor variables # Step 0. Prepare some shortcut variable
rval = scan_utils.reconstruct_graph(self.inputs, self_inputs = self.inputs
self.outputs, '_rop') rop_of_inputs = (self_inputs[:self.n_seqs + self.n_outs] +
self_inputs = rval[0] self_inputs[self.n_seqs + self.n_outs +
rop_of_inputs = rval[0][:self.n_seqs + self.n_outs] + \ self.n_shared_outs:])
rval[0][self.n_seqs + self.n_outs + self.n_shared_outs:] self_outputs = self.outputs
self_outputs = rval[1]
# Step 1. Compute the R_op of the inner function # Step 1. Compute the R_op of the inner function
inner_eval_points = [scan_utils.safe_new(x, '_evalpoint') inner_eval_points = [scan_utils.safe_new(x, '_evalpoint')
for x in rop_of_inputs] for x in rop_of_inputs]
......
...@@ -4488,9 +4488,6 @@ class T_Scan(unittest.TestCase): ...@@ -4488,9 +4488,6 @@ class T_Scan(unittest.TestCase):
n_steps=n, n_steps=n,
strict=True) strict=True)
f_strict = theano.function([x0_], ret_strict[0][-1])
result_strict = f_strict(x0)
def test_monitor_mode(self): def test_monitor_mode(self):
# Test that it is possible to pass an instance of MonitorMode # Test that it is possible to pass an instance of MonitorMode
# to the inner function # to the inner function
......
...@@ -693,8 +693,8 @@ def test_scan_debugprint5(): ...@@ -693,8 +693,8 @@ def test_scan_debugprint5():
for{cpu,scan_fn} [id F] '' for{cpu,scan_fn} [id F] ''
>Elemwise{mul,no_inplace} [id CR] '' >Elemwise{mul,no_inplace} [id CR] ''
> |<TensorType(float64, vector)> [id CS] -> [id H] > |<TensorType(float64, vector)> [id CP] -> [id H]
> |A_copy [id CT] -> [id P] > |A_copy [id CL] -> [id P]
for{cpu,scan_fn} [id F] '' for{cpu,scan_fn} [id F] ''
>Elemwise{mul,no_inplace} [id CR] '' >Elemwise{mul,no_inplace} [id CR] ''
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论