提交 e987d935 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5538 from SinaHonari/issue4078

Setting gradient for MRG_RandomStreams distribution variables to be null
...@@ -5,6 +5,7 @@ import numpy as np ...@@ -5,6 +5,7 @@ import numpy as np
import theano import theano
from theano import tensor from theano import tensor
from theano.configparser import change_flags
from theano.sandbox import rng_mrg from theano.sandbox import rng_mrg
from theano.sandbox.rng_mrg import MRG_RandomStreams from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.sandbox.tests.test_rng_mrg import java_samples, rng_mrg_overflow from theano.sandbox.tests.test_rng_mrg import java_samples, rng_mrg_overflow
...@@ -115,8 +116,6 @@ def test_consistency_GPUA_parallel(): ...@@ -115,8 +116,6 @@ def test_consistency_GPUA_parallel():
def test_GPUA_full_fill(): def test_GPUA_full_fill():
# Make sure the whole sample buffer is filled. Also make sure # Make sure the whole sample buffer is filled. Also make sure
# large samples are consistent with CPU results. # large samples are consistent with CPU results.
import theano.gpuarray.tests.config
from theano.gpuarray.type import gpuarray_shared_constructor
# This needs to be large to trigger the problem on GPU # This needs to be large to trigger the problem on GPU
size = (10, 1000) size = (10, 1000)
...@@ -136,9 +135,6 @@ def test_GPUA_full_fill(): ...@@ -136,9 +135,6 @@ def test_GPUA_full_fill():
def test_overflow_gpu_new_backend(): def test_overflow_gpu_new_backend():
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
n_substreams = 7 n_substreams = 7
curr_rstate = np.array([seed] * 6, dtype='int32') curr_rstate = np.array([seed] * 6, dtype='int32')
...@@ -162,11 +158,7 @@ def test_overflow_gpu_new_backend(): ...@@ -162,11 +158,7 @@ def test_overflow_gpu_new_backend():
def test_validate_input_types_gpuarray_backend(): def test_validate_input_types_gpuarray_backend():
from theano.sandbox.rng_mrg import mrg_uniform
from theano.gpuarray.type import gpuarray_shared_constructor
from theano.configparser import change_flags
with change_flags(compute_test_value="raise"): with change_flags(compute_test_value="raise"):
rstate = np.zeros((7, 6), dtype="int32") rstate = np.zeros((7, 6), dtype="int32")
rstate = gpuarray_shared_constructor(rstate) rstate = gpuarray_shared_constructor(rstate)
mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,)) rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,))
...@@ -1994,6 +1994,38 @@ def zero_grad(x): ...@@ -1994,6 +1994,38 @@ def zero_grad(x):
return zero_grad_(x) return zero_grad_(x)
class UndefinedGrad(ViewOp):
def grad(self, args, g_outs):
return [grad_undefined(self, i, arg) for i, arg in enumerate(args)]
def R_op(self, inputs, eval_points):
return [None]
def connection_pattern(self, node):
return [[True]]
undefined_grad_ = UndefinedGrad()
def undefined_grad(x):
"""
Consider the gradient of this variable undefined and
generate an error message if its gradient is taken.
The expression itself is unaffected, but when its gradient is
computed, or the gradient of another expression that this
expression is a subexpression of, an error message will be generated
specifying such gradient is not defined.
:param x: A Theano expression whose gradient should be undefined.
:return: The expression is returned unmodified, but its gradient
is now undefined.
"""
return undefined_grad_(x)
class DisconnectedGrad(ViewOp): class DisconnectedGrad(ViewOp):
def grad(self, args, g_outs): def grad(self, args, g_outs):
return [disconnected_type() for g_out in g_outs] return [disconnected_type() for g_out in g_outs]
......
...@@ -15,6 +15,7 @@ from six.moves import xrange ...@@ -15,6 +15,7 @@ from six.moves import xrange
import theano import theano
from theano import Op, Apply, shared, config, Variable from theano import Op, Apply, shared, config, Variable
from theano import gradient, function from theano import gradient, function
from theano.gradient import undefined_grad
from theano import tensor from theano import tensor
from theano.tensor import (TensorType, as_tensor_variable, get_vector_length, from theano.tensor import (TensorType, as_tensor_variable, get_vector_length,
cast, opt, scal) cast, opt, scal)
...@@ -773,7 +774,9 @@ class MRG_RandomStreams(object): ...@@ -773,7 +774,9 @@ class MRG_RandomStreams(object):
""" """
low = as_tensor_variable(low) low = as_tensor_variable(low)
low = undefined_grad(low)
high = as_tensor_variable(high) high = as_tensor_variable(high)
high = undefined_grad(high)
if dtype is None: if dtype is None:
dtype = scal.upcast(config.floatX, low.dtype, high.dtype) dtype = scal.upcast(config.floatX, low.dtype, high.dtype)
...@@ -821,6 +824,7 @@ class MRG_RandomStreams(object): ...@@ -821,6 +824,7 @@ class MRG_RandomStreams(object):
nstreams=None): nstreams=None):
# TODO : need description for method, parameter and return # TODO : need description for method, parameter and return
if n == 1: if n == 1:
p = undefined_grad(as_tensor_variable(p))
x = self.uniform(size=size, nstreams=nstreams) x = self.uniform(size=size, nstreams=nstreams)
return cast(x < p, dtype) return cast(x < p, dtype)
else: else:
...@@ -852,6 +856,7 @@ class MRG_RandomStreams(object): ...@@ -852,6 +856,7 @@ class MRG_RandomStreams(object):
if pvals is None: if pvals is None:
raise TypeError("You have to specify pvals") raise TypeError("You have to specify pvals")
pvals = as_tensor_variable(pvals) pvals = as_tensor_variable(pvals)
pvals = undefined_grad(pvals)
if size is not None: if size is not None:
if any([isinstance(i, integer_types) and i <= 0 for i in size]): if any([isinstance(i, integer_types) and i <= 0 for i in size]):
raise ValueError( raise ValueError(
...@@ -932,6 +937,7 @@ class MRG_RandomStreams(object): ...@@ -932,6 +937,7 @@ class MRG_RandomStreams(object):
raise TypeError("For now, p has to be specified in " raise TypeError("For now, p has to be specified in "
"MRG_RandomStreams.choice.") "MRG_RandomStreams.choice.")
p = as_tensor_variable(p) p = as_tensor_variable(p)
p = undefined_grad(p)
if ndim is not None: if ndim is not None:
raise ValueError("ndim argument to " raise ValueError("ndim argument to "
...@@ -978,7 +984,9 @@ class MRG_RandomStreams(object): ...@@ -978,7 +984,9 @@ class MRG_RandomStreams(object):
# second half our U2's. See Wikipedia page: # second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
avg = as_tensor_variable(avg) avg = as_tensor_variable(avg)
avg = undefined_grad(avg)
std = as_tensor_variable(std) std = as_tensor_variable(std)
std = undefined_grad(std)
if dtype is None: if dtype is None:
dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)
......
...@@ -687,6 +687,70 @@ def test_overflow_cpu(): ...@@ -687,6 +687,70 @@ def test_overflow_cpu():
rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False) rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
def test_undefined_grad():
srng = MRG_RandomStreams(seed=1234)
# checking uniform distribution
low = tensor.scalar()
out = srng.uniform((), low=low)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, low)
high = tensor.scalar()
out = srng.uniform((), low=0, high=high)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, high)
out = srng.uniform((), low=low, high=high)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out,
(low, high))
# checking binomial distribution
prob = tensor.scalar()
out = srng.binomial((), p=prob)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, prob)
# checking multinomial distribution
prob1 = tensor.scalar()
prob2 = tensor.scalar()
p = [theano.tensor.as_tensor_variable([prob1, 0.5, 0.25])]
out = srng.multinomial(size=None, pvals=p, n=4)[0]
assert_raises(theano.gradient.NullTypeGradError, theano.grad,
theano.tensor.sum(out), prob1)
p = [theano.tensor.as_tensor_variable([prob1, prob2])]
out = srng.multinomial(size=None, pvals=p, n=4)[0]
assert_raises(theano.gradient.NullTypeGradError, theano.grad,
theano.tensor.sum(out), (prob1, prob2))
# checking choice
p = [theano.tensor.as_tensor_variable([prob1, prob2, 0.1, 0.2])]
out = srng.choice(a=None, size=1, p=p, replace=False)[0]
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
(prob1, prob2))
p = [theano.tensor.as_tensor_variable([prob1, prob2])]
out = srng.choice(a=None, size=1, p=p, replace=False)[0]
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
(prob1, prob2))
p = [theano.tensor.as_tensor_variable([prob1, 0.2, 0.3])]
out = srng.choice(a=None, size=1, p=p, replace=False)[0]
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out[0],
prob1)
# checking normal distribution
avg = tensor.scalar()
out = srng.normal((), avg=avg)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, avg)
std = tensor.scalar()
out = srng.normal((), avg=0, std=std)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out, std)
out = srng.normal((), avg=avg, std=std)
assert_raises(theano.gradient.NullTypeGradError, theano.grad, out,
(avg, std))
if __name__ == "__main__": if __name__ == "__main__":
rng = MRG_RandomStreams(np.random.randint(2147462579)) rng = MRG_RandomStreams(np.random.randint(2147462579))
print(theano.__file__) print(theano.__file__)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论