提交 542b35a8 authored 作者: Sander Dieleman's avatar Sander Dieleman

Moved consider_constant from theano.tensor.extra_ops to theano.gradient, moved…

Moved consider_constant from theano.tensor.extra_ops to theano.gradient, moved the optimization that removes it to theano.tensor.opt. Modified theano.compile.builders so it doesn't import theano.gradient, because theano.gradient now imports ViewOp from theano.compile and this would lead to a circular dependency.
上级 34864fab
from theano import gof from theano import gof
from theano import gradient as G
from theano.compile.function_module import orig_function from theano.compile.function_module import orig_function
from theano.compile import SharedVariable, rebuild_collect_shared from theano.compile import SharedVariable, rebuild_collect_shared
from theano.gof import ops_with_inner_function from theano.gof import ops_with_inner_function
...@@ -142,7 +141,7 @@ class OpFromGraph(gof.Op): ...@@ -142,7 +141,7 @@ class OpFromGraph(gof.Op):
if hasattr(self, "grad_ops"): if hasattr(self, "grad_ops"):
grad_ops = self.grad_ops grad_ops = self.grad_ops
else: else:
gs = G.grad(cost=None, gs = theano.gradient.grad(cost=None,
known_grads=dict(zip(self.new_outputs, output_grads)), known_grads=dict(zip(self.new_outputs, output_grads)),
wrt=self.new_inputs, wrt=self.new_inputs,
disconnected_inputs='ignore') disconnected_inputs='ignore')
......
...@@ -23,6 +23,7 @@ from theano.gof import Variable ...@@ -23,6 +23,7 @@ from theano.gof import Variable
from theano.gof.python25 import OrderedDict from theano.gof.python25 import OrderedDict
from theano.gof.null_type import NullType from theano.gof.null_type import NullType
from theano.gof.op import get_debug_values from theano.gof.op import get_debug_values
from theano.compile import ViewOp
# we can't do "import theano.tensor" # we can't do "import theano.tensor"
# tensor depends on theano.compile # tensor depends on theano.compile
...@@ -1685,3 +1686,29 @@ def _is_zero(x): ...@@ -1685,3 +1686,29 @@ def _is_zero(x):
return 'no' return 'no'
return 'yes' return 'yes'
class ConsiderConstant(ViewOp):
def grad(self, args, g_outs):
return [g_out.zeros_like(g_out) for g_out in g_outs]
consider_constant_ = ConsiderConstant()
#I create a function only to have the doc show well.
def consider_constant(x):
""" Consider an expression constant when computing gradients.
The expression itself is unaffected, but when its gradient is
computed, or the gradient of another expression that this
expression is a subexpression of, it will not be backpropagated
through. In other words, the gradient of the expression is
truncated to 0.
:param x: A Theano expression whose gradient should be truncated.
:return: The expression is returned unmodified, but its gradient
is now truncated to 0.
.. versionadded:: 0.6.1
"""
return consider_constant_(x)
...@@ -58,9 +58,8 @@ def shared(*args, **kw): ...@@ -58,9 +58,8 @@ def shared(*args, **kw):
from theano.tensor import nnet # used for softmax, sigmoid, etc. from theano.tensor import nnet # used for softmax, sigmoid, etc.
from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \ from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \
jacobian, hessian jacobian, hessian, consider_constant
from theano.tensor.sort import sort, argsort from theano.tensor.sort import sort, argsort
from theano.tensor.extra_ops import (DiffOp, bincount, squeeze, from theano.tensor.extra_ops import (DiffOp, bincount, squeeze,
repeat, bartlett, fill_diagonal, cumsum, cumprod, repeat, bartlett, fill_diagonal, cumsum, cumprod)
consider_constant)
...@@ -6,8 +6,6 @@ from theano.tensor import basic ...@@ -6,8 +6,6 @@ from theano.tensor import basic
from theano import gof, scalar from theano import gof, scalar
tensor = basic tensor = basic
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
from theano.compile import ViewOp
from theano.tensor.opt import register_canonicalize
class CumsumOp(theano.Op): class CumsumOp(theano.Op):
...@@ -725,35 +723,3 @@ def fill_diagonal(a, val): ...@@ -725,35 +723,3 @@ def fill_diagonal(a, val):
.. versionadded:: 0.6 .. versionadded:: 0.6
""" """
return fill_diagonal_(a, val) return fill_diagonal_(a, val)
class ConsiderConstant(ViewOp):
def grad(self, args, g_outs):
return [g_out.zeros_like(g_out) for g_out in g_outs]
consider_constant_ = ConsiderConstant()
# Although the op just returns its input, it should be removed from
# the graph to make sure all possible optimizations can be applied.
register_canonicalize(gof.OpRemove(consider_constant_),
'fast_compile', name='remove_consider_constant')
#I create a function only to have the doc show well.
def consider_constant(x):
""" Consider an expression constant when computing gradients.
The expression itself is unaffected, but when its gradient is
computed, or the gradient of another expression that this
expression is a subexpression of, it will not be backpropagated
through. In other words, the gradient of the expression is
truncated to 0.
:param x: A Theano expression whose gradient should be truncated.
:return: The expression is returned unmodified, but its gradient
is now truncated to 0.
.. versionadded:: 0.6.1
"""
return consider_constant_(x)
...@@ -4809,3 +4809,13 @@ else: ...@@ -4809,3 +4809,13 @@ else:
FusionOptimizer(local_elemwise_fusion), 71.00, FusionOptimizer(local_elemwise_fusion), 71.00,
'fusion', 'local_elemwise_fusion', 'fusion', 'local_elemwise_fusion',
'FusionOptimizer') 'FusionOptimizer')
# ############################
# # Remove consider_constant #
# ############################
# Although the op just returns its input, it should be removed from
# the graph to make sure all possible optimizations can be applied.
register_canonicalize(gof.OpRemove(theano.gradient.consider_constant_),
'fast_compile', name='remove_consider_constant')
\ No newline at end of file
...@@ -8,7 +8,7 @@ from theano.tests import unittest_tools as utt ...@@ -8,7 +8,7 @@ from theano.tests import unittest_tools as utt
from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod, from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod,
BinCountOp, bincount, DiffOp, diff, BinCountOp, bincount, DiffOp, diff,
squeeze, RepeatOp, repeat, Bartlett, bartlett, squeeze, RepeatOp, repeat, Bartlett, bartlett,
FillDiagonal, fill_diagonal, consider_constant) FillDiagonal, fill_diagonal)
from theano import tensor as T from theano import tensor as T
from theano import config, tensor, function from theano import config, tensor, function
...@@ -464,40 +464,3 @@ class TestFillDiagonal(utt.InferShapeTester): ...@@ -464,40 +464,3 @@ class TestFillDiagonal(utt.InferShapeTester):
numpy.random.rand()], numpy.random.rand()],
self.op_class, self.op_class,
warn=False) warn=False)
class TestConsiderConstant(unittest.TestCase):
def setUp(self):
utt.seed_rng()
self.rng = np.random.RandomState(seed=utt.fetch_seed())
def test_op_removed(self):
x = T.matrix('x')
y = x * consider_constant(x)
f = theano.function([x], y)
# need to refer to T.extra_ops.consider_constant_ here,
# T.consider_constant is a wrapper function!
assert T.extra_ops.consider_constant_ not in \
[node.op for node in f.maker.fgraph.toposort()]
def test_grad(self):
a = numpy.asarray(self.rng.randn(5, 5),
dtype=config.floatX)
x = T.matrix('x')
expressions_gradients = [
(x * consider_constant(x), x),
(x * consider_constant(T.exp(x)), T.exp(x)),
(consider_constant(x), T.constant(0.)),
(x**2 * consider_constant(x), 2 * x**2),
]
for expr, expr_grad in expressions_gradients:
g = T.grad(expr.sum(), x)
f = theano.function([x], g, on_unused_input='ignore') # grad according to theano
f2 = theano.function([x], expr_grad, on_unused_input='ignore') # desired grad
assert np.allclose(f(a), f2(a))
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import unittest import unittest
import theano import theano
from theano import gof from theano import gof
from theano.tests import unittest_tools as utt
from theano import gradient from theano import gradient
from theano.tensor.nnet.Conv3D import conv3D from theano.tensor.nnet.Conv3D import conv3D
...@@ -554,5 +555,45 @@ def test_disconnected_cost_grad(): ...@@ -554,5 +555,45 @@ def test_disconnected_cost_grad():
return return
raise AssertionError("A disconnected gradient has been ignored.") raise AssertionError("A disconnected gradient has been ignored.")
class TestConsiderConstant(unittest.TestCase):
def setUp(self):
utt.seed_rng()
self.rng = np.random.RandomState(seed=utt.fetch_seed())
def test_op_removed(self):
x = theano.tensor.matrix('x')
y = x * gradient.consider_constant(x)
f = theano.function([x], y)
# need to refer to theano.gradient.consider_constant_ here,
# theano.gradient.consider_constant is a wrapper function!
assert gradient.consider_constant_ not in \
[node.op for node in f.maker.fgraph.toposort()]
def test_grad(self):
T = theano.tensor
a = np.asarray(self.rng.randn(5, 5),
dtype=config.floatX)
x = T.matrix('x')
expressions_gradients = [
(x * gradient.consider_constant(x), x),
(x * gradient.consider_constant(T.exp(x)), T.exp(x)),
(gradient.consider_constant(x), T.constant(0.)),
(x**2 * gradient.consider_constant(x), 2 * x**2),
]
for expr, expr_grad in expressions_gradients:
g = gradient.grad(expr.sum(), x)
# gradient according to theano
f = theano.function([x], g, on_unused_input='ignore')
# desired gradient
f2 = theano.function([x], expr_grad, on_unused_input='ignore')
assert np.allclose(f(a), f2(a))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论