提交 c8216133 authored 作者: goodfeli's avatar goodfeli

Merge pull request #992 from nouiz/grad_switch

Grad switch
...@@ -25,7 +25,7 @@ This is a release candidate for a major version, with lots of new ...@@ -25,7 +25,7 @@ This is a release candidate for a major version, with lots of new
features, bug fixes, and some interface changes (deprecated or features, bug fixes, and some interface changes (deprecated or
potentially misleading features were removed). potentially misleading features were removed).
The upgrade is recommended for developpers who want to help test and The upgrade is recommended for developers who want to help test and
report bugs, or want to use new features now. If you have updated report bugs, or want to use new features now. If you have updated
to 0.5rc1, you are highly encouraged to update to 0.5rc2. to 0.5rc1, you are highly encouraged to update to 0.5rc2.
...@@ -106,18 +106,14 @@ http://deeplearning.net/tutorial/ ...@@ -106,18 +106,14 @@ http://deeplearning.net/tutorial/
Acknowledgments Acknowledgments
--------------- ---------------
I would like to thank all contributors of Theano. For this particular I would like to thank all contributors of Theano. For this particular
release, many people have helped, notably (in alphabetical order): release, many people have helped, notably (in alphabetical order):
Hani Almousli, Frédéric Bastien, Justin Bayer, Arnaud Bergeron, James [Generate the list of commiters: git shortlog -s <previous_tag>...| cut -c8-]
Bergstra, Valentin Bisson, Josh Bleecher Snyder, Yann Dauphin, Olivier
Delalleau, Guillaume Desjardins, Sander Dieleman, Xavier Glorot, Ian
Goodfellow, Philippe Hamel, Pascal Lamblin, Eric Laufer, Grégoire
Mesnil, Razvan Pascanu, Matthew Rocklin, Graham Taylor, Sebastian Urban,
David Warde-Farley, and Yao Li.
I would also like to thank users who submitted bug reports, notably: I would also like to thank users who submitted bug reports, notably:
Nicolas Boulanger-Lewandowski, Olivier Chapelle, Michael Forbes, Timothy [TODO]
Lillicrap, and John Salvatier.
Also, thank you to all NumPy and Scipy developers as Theano builds on Also, thank you to all NumPy and Scipy developers as Theano builds on
their strengths. their strengths.
......
...@@ -986,6 +986,11 @@ Condition ...@@ -986,6 +986,11 @@ Condition
x,y = T.dmatrices('x','y') x,y = T.dmatrices('x','y')
z = T.switch(T.lt(a,b), x, y) z = T.switch(T.lt(a,b), x, y)
.. function:: where(cond, ift, iff)
Alias for `switch`. where is the numpy name.
.. function:: clip(x, min, max) .. function:: clip(x, min, max)
Return a variable representing x, but with all elements greater than Return a variable representing x, but with all elements greater than
......
...@@ -1051,7 +1051,12 @@ class Switch(ScalarOp): ...@@ -1051,7 +1051,12 @@ class Switch(ScalarOp):
else: else:
second_part = None second_part = None
return (None, first_part, second_part) # cond does affect the elements of the output so it is connected.
# For the sake of making the gradient convenient we assume that
# condition + epsilon always triggers the same branch as condition
condition_grad = cond.zeros_like().astype(theano.config.floatX)
return (condition_grad, first_part, second_part)
def output_types(self, (cond_t, ift_t, iff_t)): def output_types(self, (cond_t, ift_t, iff_t)):
return upcast_out(ift_t, iff_t) return upcast_out(ift_t, iff_t)
......
...@@ -1527,7 +1527,7 @@ class SpSumTester(utt.InferShapeTester): ...@@ -1527,7 +1527,7 @@ class SpSumTester(utt.InferShapeTester):
for format in sparse.sparse_formats: for format in sparse.sparse_formats:
for axis in self.possible_axis: for axis in self.possible_axis:
variable, data = sparse_random_inputs(format, variable, data = sparse_random_inputs(format,
shape=(10, 10)) shape=(9, 10))
self._compile_and_check(variable, self._compile_and_check(variable,
[self.op(variable[0], axis=axis)], [self.op(variable[0], axis=axis)],
data, data,
...@@ -1538,7 +1538,7 @@ class SpSumTester(utt.InferShapeTester): ...@@ -1538,7 +1538,7 @@ class SpSumTester(utt.InferShapeTester):
for axis in self.possible_axis: for axis in self.possible_axis:
for struct in [True, False]: for struct in [True, False]:
variable, data = sparse_random_inputs(format, variable, data = sparse_random_inputs(format,
shape=(10, 10)) shape=(9, 10))
verify_grad_sparse( verify_grad_sparse(
self.op_class(axis=axis, sparse_grad=struct), self.op_class(axis=axis, sparse_grad=struct),
data, data,
...@@ -1744,7 +1744,7 @@ class Remove0Tester(utt.InferShapeTester): ...@@ -1744,7 +1744,7 @@ class Remove0Tester(utt.InferShapeTester):
assert result.size == target.size, msg assert result.size == target.size, msg
def test_infer_shape(self): def test_infer_shape(self):
mat = (numpy.arange(9) + 1).reshape((3, 3)) mat = (numpy.arange(12) + 1).reshape((4, 3))
mat[0, 1] = mat[1, 0] = mat[2, 2] = 0 mat[0, 1] = mat[1, 0] = mat[2, 2] = 0
x_csc = theano.sparse.csc_matrix(dtype=theano.config.floatX) x_csc = theano.sparse.csc_matrix(dtype=theano.config.floatX)
......
...@@ -2605,11 +2605,11 @@ def isinf(a): ...@@ -2605,11 +2605,11 @@ def isinf(a):
# Condition # Condition
########################## ##########################
@_scal_elemwise @_scal_elemwise_with_nfunc('where', 3, 1)
def switch(cond, ift, iff): def switch(cond, ift, iff):
"""if cond then ift else iff""" """if cond then ift else iff"""
where = switch
########################## ##########################
# Bit-wise # Bit-wise
########################## ##########################
......
...@@ -39,7 +39,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as, ...@@ -39,7 +39,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements, tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc, ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1, dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
itensor3, Tile, AdvancedIncSubtensor) itensor3, Tile, AdvancedIncSubtensor, switch)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.printing import debugprint from theano.printing import debugprint
...@@ -618,6 +618,36 @@ SubInplaceTester = makeBroadcastTester(op=inplace.sub_inplace, ...@@ -618,6 +618,36 @@ SubInplaceTester = makeBroadcastTester(op=inplace.sub_inplace,
grad = _grad_broadcast_binary_normal, grad = _grad_broadcast_binary_normal,
inplace = True) inplace = True)
SwitchTester = makeBroadcastTester(
op=switch,
expected=numpy.where,
good=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
rand(4, 5), rand(4, 5)),
false_true=(numpy.asarray(0, dtype=config.floatX),
rand(4, 5), rand(4, 5)),
mixed=(randint_ranged(0, 1, (4, 5)),
rand(4, 5), rand(4, 5))
),
bad_build=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
rand(4, 5))),
bad_runtime=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
rand(3, 5), rand(4, 5)),
false_true=(numpy.asarray(0, dtype=config.floatX),
rand(4, 6), rand(4, 5)),
),
# We suppose that cond+eps do not switch branch in switch.grad()
# So we can't call verify_grad with cond 0.
grad=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
rand(4, 5), rand(4, 5)),
# false_true=(numpy.asarray(0, dtype=config.floatX),
# rand(4, 5), rand(4, 5)),
# mixed=(randint_ranged(0, 1, (4, 5)).astype(config.floatX),
# rand(4, 5), rand(4, 5))
),
)
MaximumTester = makeBroadcastTester(op=maximum, MaximumTester = makeBroadcastTester(op=maximum,
expected = lambda *inputs: check_floatX(inputs, numpy.maximum(*inputs)), expected = lambda *inputs: check_floatX(inputs, numpy.maximum(*inputs)),
good = _good_broadcast_binary_normal, good = _good_broadcast_binary_normal,
......
...@@ -242,7 +242,8 @@ class TestRepeatOp(utt.InferShapeTester): ...@@ -242,7 +242,8 @@ class TestRepeatOp(utt.InferShapeTester):
def test_infer_shape(self): def test_infer_shape(self):
for ndim in range(4): for ndim in range(4):
x = T.TensorType(config.floatX, [False] * ndim)() x = T.TensorType(config.floatX, [False] * ndim)()
a = np.random.random((10, ) * ndim).astype(config.floatX) shp = (numpy.arange(ndim) + 1) * 5
a = np.random.random(shp).astype(config.floatX)
for axis in self._possible_axis(ndim): for axis in self._possible_axis(ndim):
for dtype in tensor.discrete_dtypes: for dtype in tensor.discrete_dtypes:
...@@ -261,6 +262,9 @@ class TestRepeatOp(utt.InferShapeTester): ...@@ -261,6 +262,9 @@ class TestRepeatOp(utt.InferShapeTester):
if axis is None: if axis is None:
r = np.random.random_integers( r = np.random.random_integers(
5, size=a.size).astype(dtype) 5, size=a.size).astype(dtype)
elif a.size > 0:
r = np.random.random_integers(
5, size=a.shape[axis]).astype(dtype)
else: else:
r = np.random.random_integers( r = np.random.random_integers(
5, size=(10,)).astype(dtype) 5, size=(10,)).astype(dtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论