Merge pull request #5538 from SinaHonari/issue4078

Setting gradient for MRG_RandomStreams distribution variables to be null

Merge pull request #5538 from SinaHonari/issue4078
e987d935 · Pascal Lamblin · GitHub · 16a02775 · 64e61d26 · e987d935
--- a/theano/gpuarray/tests/test_rng_mrg.py
+++ b/theano/gpuarray/tests/test_rng_mrg.py
@@ -5,6 +5,7 @@ import numpy as np
 import theano
 from theano import tensor
+from theano.configparser import change_flags
 from theano.sandbox import rng_mrg
 from theano.sandbox.rng_mrg import MRG_RandomStreams
 from theano.sandbox.tests.test_rng_mrg import java_samples, rng_mrg_overflow
@@ -115,8 +116,6 @@ def test_consistency_GPUA_parallel():
 def test_GPUA_full_fill():
    # Make sure the whole sample buffer is filled.  Also make sure
    # large samples are consistent with CPU results.
-    import theano.gpuarray.tests.config
-    from theano.gpuarray.type import gpuarray_shared_constructor
    # This needs to be large to trigger the problem on GPU
    size = (10, 1000)
@@ -136,9 +135,6 @@ def test_GPUA_full_fill():
 def test_overflow_gpu_new_backend():
-    from theano.gpuarray.tests.test_basic_ops import \
-        mode_with_gpu as mode
-    from theano.gpuarray.type import gpuarray_shared_constructor
    seed = 12345
    n_substreams = 7
    curr_rstate = np.array([seed] * 6, dtype='int32')
@@ -162,11 +158,7 @@ def test_overflow_gpu_new_backend():
 def test_validate_input_types_gpuarray_backend():
-    from theano.sandbox.rng_mrg import mrg_uniform
-    from theano.gpuarray.type import gpuarray_shared_constructor
-    from theano.configparser import change_flags
    with change_flags(compute_test_value="raise"):
        rstate = np.zeros((7, 6), dtype="int32")
        rstate = gpuarray_shared_constructor(rstate)
-        mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,))
+        rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,))
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -1994,6 +1994,38 @@ def zero_grad(x):
    return zero_grad_(x)
+class UndefinedGrad(ViewOp):
+    def grad(self, args, g_outs):
+        return [grad_undefined(self, i, arg) for i, arg in enumerate(args)]
+    def R_op(self, inputs, eval_points):
+        return [None]
+    def connection_pattern(self, node):
+        return [[True]]
+undefined_grad_ = UndefinedGrad()
+def undefined_grad(x):
+    """
+    Consider the gradient of this variable undefined and
+    generate an error message if its gradient is taken.
+    The expression itself is unaffected, but when its gradient is
+    computed, or the gradient of another expression that this
+    expression is a subexpression of, an error message will be generated
+    specifying such gradient is not defined.
+    :param x: A Theano expression whose gradient should be undefined.
+    :return: The expression is returned unmodified, but its gradient
+        is now undefined.
+    """
+    return undefined_grad_(x)
 class DisconnectedGrad(ViewOp):
    def grad(self, args, g_outs):
        return [disconnected_type() for g_out in g_outs]

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -15,6 +15,7 @@ from six.moves import xrange
 import theano
 from theano import Op, Apply, shared, config, Variable
 from theano import gradient, function
+from theano.gradient import undefined_grad
 from theano import tensor
 from theano.tensor import (TensorType, as_tensor_variable, get_vector_length,
                           cast, opt, scal)
@@ -773,7 +774,9 @@ class MRG_RandomStreams(object):
        """
        low = as_tensor_variable(low)
+        low = undefined_grad(low)
        high = as_tensor_variable(high)
+        high = undefined_grad(high)
        if dtype is None:
            dtype = scal.upcast(config.floatX, low.dtype, high.dtype)
@@ -821,6 +824,7 @@ class MRG_RandomStreams(object):
                 nstreams=None):
        # TODO : need description for method, parameter and return
        if n == 1:
+            p = undefined_grad(as_tensor_variable(p))
            x = self.uniform(size=size, nstreams=nstreams)
            return cast(x < p, dtype)
        else:
@@ -852,6 +856,7 @@ class MRG_RandomStreams(object):
        if pvals is None:
            raise TypeError("You have to specify pvals")
        pvals = as_tensor_variable(pvals)
+        pvals = undefined_grad(pvals)
        if size is not None:
            if any([isinstance(i, integer_types) and i <= 0 for i in size]):
                raise ValueError(
@@ -932,6 +937,7 @@ class MRG_RandomStreams(object):
            raise TypeError("For now, p has to be specified in "
                            "MRG_RandomStreams.choice.")
        p = as_tensor_variable(p)
+        p = undefined_grad(p)
        if ndim is not None:
            raise ValueError("ndim argument to "
@@ -978,7 +984,9 @@ class MRG_RandomStreams(object):
        # second half our U2's. See Wikipedia page:
        # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
        avg = as_tensor_variable(avg)
+        avg = undefined_grad(avg)
        std = as_tensor_variable(std)
+        std = undefined_grad(std)
        if dtype is None:
            dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)

--- a/theano/sandbox/tests/test_rng_mrg.py
+++ b/theano/sandbox/tests/test_rng_mrg.py
@@ -687,6 +687,70 @@ def test_overflow_cpu():
    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
+def test_undefined_grad():
+    srng = MRG_RandomStreams(seed=1234)
+    # checking uniform distribution
+    low = tensor.scalar()
+    out = srng.uniform((), low=low)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, low)
+    high = tensor.scalar()
+    out = srng.uniform((), low=0, high=high)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, high)
+    out = srng.uniform((), low=low, high=high)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out,
+                  (low, high))
+    # checking binomial distribution
+    prob = tensor.scalar()
+    out = srng.binomial((), p=prob)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, prob)
+    # checking multinomial distribution
+    prob1 = tensor.scalar()
+    prob2 = tensor.scalar()
+    p = [theano.tensor.as_tensor_variable([prob1, 0.5, 0.25])]
+    out = srng.multinomial(size=None, pvals=p, n=4)[0]
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad,
+                  theano.tensor.sum(out), prob1)
+    p = [theano.tensor.as_tensor_variable([prob1, prob2])]
+    out = srng.multinomial(size=None, pvals=p, n=4)[0]
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad,
+                  theano.tensor.sum(out), (prob1, prob2))
+    # checking choice
+    p = [theano.tensor.as_tensor_variable([prob1, prob2, 0.1, 0.2])]
+    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
+                  (prob1, prob2))
+    p = [theano.tensor.as_tensor_variable([prob1, prob2])]
+    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
+                  (prob1, prob2))
+    p = [theano.tensor.as_tensor_variable([prob1, 0.2, 0.3])]
+    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
+                  prob1)
+    # checking normal distribution
+    avg = tensor.scalar()
+    out = srng.normal((), avg=avg)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, avg)
+    std = tensor.scalar()
+    out = srng.normal((), avg=0, std=std)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, std)
+    out = srng.normal((), avg=avg, std=std)
+    assert_raises(theano.gradient.NullTypeGradError, theano.grad, out,
+                  (avg, std))
 if __name__ == "__main__":
    rng = MRG_RandomStreams(np.random.randint(2147462579))
    print(theano.__file__)