Merge pull request #2403 from nouiz/gradient_clipping

add GradClip op

Merge pull request #2403 from nouiz/gradient_clipping
6acf4e67 · abergeron · 370953c5 · eabd4641 · 6acf4e67 · 6acf4e67
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -1888,3 +1888,46 @@ def consider_constant(x):
    .. versionadded:: 0.6.1
    """
    return consider_constant_(x)
+class GradClip(theano.compile.ViewOp):
+    # See doc in user fct grad_clip
+    __props__ = ()
+    def __init__(self, clip_lower_bound, clip_upper_bound):
+        # We do not put those member in __eq__ or __hash__
+        # as they do not influence the perform of this op.
+        self.clip_lower_bound = clip_lower_bound
+        self.clip_upper_bound = clip_upper_bound
+        assert(self.clip_upper_bound >= self.clip_lower_bound)
+    def grad(self, args, g_outs):
+        return [theano.tensor.clip(g_out, self.clip_lower_bound,
+                                   self.clip_upper_bound)
+                for g_out in g_outs]
+def grad_clip(x, lower_bound, upper_bound):
+    """
+    This op do a view in the forward, but clip the gradient.
+    This is an elemwise operation.
+    :param x: the variable we want its gradient inputs clipped
+    :param lower_bound: The lower bound of the gradient value
+    :param upper_bound: The upper bound of the gradient value.
+    :examples:
+        x = theano.tensor.scalar()
+        z = theano.tensor.grad(grad_clip(x)**2, x)
+        z2 = theano.tensor.grad(x**2, x)
+        f = theano.function([x], outputs = [z, z2])
+        print f(2.0)  # output (1.0, 4.0)
+    :note: We register an opt in tensor/opt.py that remove the GradClip.
+       So it have 0 cost in the forward and only do work in the grad.
+    """
+    return GradClip(lower_bound, upper_bound)(x)
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -5551,3 +5551,10 @@ else:
 # the graph to make sure all possible optimizations can be applied.
 register_canonicalize(gof.OpRemove(theano.gradient.consider_constant_),
    'fast_compile', 'fast_run', name='remove_consider_constant')
+@register_canonicalize
+@gof.local_optimizer([theano.gradient.GradClip])
+def local_grad_clip(node):
+    if isinstance(node.op, theano.gradient.GradClip):
+        return node.inputs
--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -3,6 +3,9 @@
 # UNIT TEST
 #
 import unittest
+import numpy as np
 import theano
 from theano import gof
 from theano.tests import unittest_tools as utt
@@ -10,7 +13,6 @@ from theano.tests import unittest_tools as utt
 from theano import gradient
 from theano.tensor.nnet.Conv3D import conv3D
 from theano import config
-import numpy as np
 from theano.gof.null_type import NullType
 one = theano.tensor.as_tensor_variable(1.)
@@ -641,5 +643,21 @@ class TestConsiderConstant(unittest.TestCase):
            assert np.allclose(f(a), f2(a))
+def test_grad_clip():
+    x = theano.tensor.scalar()
+    z = theano.tensor.grad(gradient.grad_clip(x, -1, 1)**2, x)
+    z2 = theano.tensor.grad(x**2, x)
+    f = theano.function([x], outputs=[z, z2])
+    if theano.config.mode != "FAST_COMPILE":
+        topo = f.maker.fgraph.toposort()
+        assert not any([isinstance(node.op, gradient.GradClip)
+                        for node in topo])
+    out = f(2.)
+    assert np.allclose(out, (1, 4))
+    assert not np.allclose(out[0], out[1])
 if __name__ == '__main__':
    unittest.main()