提交 7885e618 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

a lot of pep8

上级 c7d06ac9
...@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp): ...@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
assert rval.type.dtype.find('float') != -1 assert rval.type.dtype.find('float') != -1
return [ rval ] return [rval]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, = inp x, = inp
...@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid') ...@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace = elemwise.Elemwise( sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)), ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0:0}, inplace_pattern={0: 0},
name='sigmoid_inplace', name='sigmoid_inplace',
) )
...@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp): ...@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
if x > 30.0: if x > 30.0:
return x return x
return numpy.log1p(numpy.exp(x)) return numpy.log1p(numpy.exp(x))
def impl(self, x): def impl(self, x):
return ScalarSoftplus.static_impl(x) return ScalarSoftplus.static_impl(x)
def grad(self, inp, grads): def grad(self, inp, grads):
x, = inp x, = inp
gz, = grads gz, = grads
return [gz * scalar_sigmoid(x)] return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, = inp x, = inp
z, = out z, = out
...@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp): ...@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals() return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else: else:
raise NotImplementedError('only floatingpoint is implemented') raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self): def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version() v = super(ScalarSoftplus, self).c_code_cache_version()
if v: if v:
return (2,) + v return (2,) + v
else: else:
return v return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus') scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name= 'scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus') softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus')) pprint.assign(softplus, printing.FunctionPrinter('softplus'))
def _skip_mul_1(r): def _skip_mul_1(r):
if r.owner and r.owner.op == tensor.mul: if r.owner and r.owner.op == tensor.mul:
not_is_1 = [i for i in r.owner.inputs if not _is_1(i) ] not_is_1 = [i for i in r.owner.inputs if not _is_1(i)]
if len(not_is_1)==1: if len(not_is_1) == 1:
return not_is_1[0] return not_is_1[0]
logsigm_to_softplus = gof.PatternSub( logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')), (tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))), (tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True, allow_multiple_clients=True,
skip_identities_fn=_skip_mul_1) skip_identities_fn=_skip_mul_1)
...@@ -139,21 +144,22 @@ def _is_1(expr): ...@@ -139,21 +144,22 @@ def _is_1(expr):
log1msigm_to_softplus = gof.PatternSub( log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.log,
(tensor.sub, (tensor.sub,
dict(pattern='y', constraint = _is_1), dict(pattern='y', constraint=_is_1),
(sigmoid, 'x'))), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')), (tensor.neg, (softplus, 'x')),
allow_multiple_clients = True, allow_multiple_clients=True,
skip_identities_fn=_skip_mul_1) skip_identities_fn=_skip_mul_1)
log1pexp_to_softplus = gof.PatternSub( log1pexp_to_softplus = gof.PatternSub(
(tensor.log1p, (tensor.log1p,
(tensor.exp, 'x')), (tensor.exp, 'x')),
(softplus, 'x'), (softplus, 'x'),
allow_multiple_clients = True) allow_multiple_clients=True)
opt.register_stabilize(logsigm_to_softplus, name='logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name='log1msigm_to_softplus')
opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
opt.register_stabilize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
opt.register_stabilize(log1pexp_to_softplus, name = 'log1pexp_to_softplus')
def is_1pexp(t): def is_1pexp(t):
""" """
...@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f): ...@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
else: else:
neg_t, f_t = f_t neg_t, f_t = f_t
f_terms.append(f_t) f_terms.append(f_t)
neg ^= neg_t #bit flip if neg_t is true neg ^= neg_t # bit flip if neg_t is true
return f_terms, rest, neg return f_terms, rest, neg
...@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node): ...@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
#find all the exp() terms in the numerator #find all the exp() terms in the numerator
num, denom = node.inputs num, denom = node.inputs
num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp) num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp) denom_1pexp, denom_rest,
denom_neg = partition_num_or_denom(denom, is_1pexp)
sigmoids = [] sigmoids = []
for t in denom_1pexp: for t in denom_1pexp:
...@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node): ...@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
# case: 1/(1+exp(x)) # case: 1/(1+exp(x))
sigmoids.append(sigmoid(-t)) sigmoids.append(sigmoid(-t))
if not sigmoids: # we didn't find any. abort if not sigmoids: # we didn't find any. abort
return return
# put the new numerator together # put the new numerator together
new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
...@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node): ...@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
else: else:
return [new_num / tensor.mul(*denom_rest)] return [new_num / tensor.mul(*denom_rest)]
def parse_mul_tree(root): def parse_mul_tree(root):
""" """
Parse a tree of multiplications starting at the given root. Parse a tree of multiplications starting at the given root.
...@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None, ...@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
sigm_minus_x = [] sigm_minus_x = []
if full_tree is None: if full_tree is None:
full_tree = tree full_tree = tree
if False: # Debug code. if False: # Debug code.
print '<perform_sigm_times_exp>' print '<perform_sigm_times_exp>'
print ' full_tree = %s' % full_tree print ' full_tree = %s' % full_tree
print ' tree = %s' % tree print ' tree = %s' % tree
...@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node): ...@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
if scalars and numpy.allclose(numpy.sum(scalars), 1): if scalars and numpy.allclose(numpy.sum(scalars), 1):
return opt._fill_chain( return opt._fill_chain(
sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])), sigmoid(
tensor.neg(nonconsts[0].owner.inputs[0])),
scalar_inputs) scalar_inputs)
# Registration is below, and conditional. # Registration is below, and conditional.
@gof.local_optimizer([tensor.sub]) @gof.local_optimizer([tensor.sub])
def local_1msigmoid(node): def local_1msigmoid(node):
""" """
...@@ -633,7 +644,7 @@ def local_1msigmoid(node): ...@@ -633,7 +644,7 @@ def local_1msigmoid(node):
if node.op == tensor.sub: if node.op == tensor.sub:
sub_l, sub_r = node.inputs sub_l, sub_r = node.inputs
if len(sub_r.clients) > 1: if len(sub_r.clients) > 1:
return # graph is using both sigm and 1-sigm return # graph is using both sigm and 1-sigm
if sub_r.owner and sub_r.owner.op == sigmoid: if sub_r.owner and sub_r.owner.op == sigmoid:
try: try:
val_l = opt.get_constant_value(sub_l) val_l = opt.get_constant_value(sub_l)
...@@ -686,13 +697,14 @@ if 0: ...@@ -686,13 +697,14 @@ if 0:
assert t0.owner.op == div assert t0.owner.op == div
t0top, t0bot = t0.owner.inputs t0top, t0bot = t0.owner.inputs
t1top, t1bot = t1.owner.inputs t1top, t1bot = t1.owner.inputs
rval.append(div(mul(*(t0top+t1top)), mul(*(t0bot+t1bot)))) rval.append(div(mul(*(
t0top + t1top)), mul(*(t0bot + t1bot))))
if len(rval) > 100: if len(rval) > 100:
# This loop can be exponentially long. # This loop can be exponentially long.
# aborting # aborting
return [] return []
elif len(node.outputs)>1: elif len(node.outputs) > 1:
return [] return []
else: else:
return [node.outputs[0]] return [node.outputs[0]]
...@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase): ...@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
""" """
def __init__(self, seed=None, no_warn = False): def __init__(self, seed=None, no_warn=False):
""":type seed: None or int """:type seed: None or int
:param seed: a default seed to initialize the RandomState :param seed: a default seed to initialize the RandomState
...@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase): ...@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
""" """
if not no_warn: if not no_warn:
deprecation_warning() deprecation_warning()
super(RandomStreams, self).__init__(no_warn = True) super(RandomStreams, self).__init__(no_warn=True)
self.random_state_variables = [] self.random_state_variables = []
self.default_instance_seed = seed self.default_instance_seed = seed
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester): ...@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
#test that DimShuffle.infer_shape work correctly #test that DimShuffle.infer_shape work correctly
x = TensorType('float64', ib)('x') x = TensorType('float64', ib)('x')
e = DimShuffle(ib, shuffle)(x) e = DimShuffle(ib, shuffle)(x)
f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function() f = copy(linker).accept(FunctionGraph([x], [e.
shape])).make_function()
assert all(f(numpy.ones(xsh))) == all(zsh) assert all(f(numpy.ones(xsh))) == all(zsh)
# Test when we drop a axis that is not broadcastable # Test when we drop a axis that is not broadcastable
...@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase): ...@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
x = TensorType('float64', [(entry == 1) for entry in xsh])('x') x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
y = TensorType('float64', [(entry == 1) for entry in ysh])('y') y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
e = Elemwise(scalar.add)(x, y) e = Elemwise(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function() f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
assert tuple(f(xv, yv)) == tuple(zv.shape) assert tuple(f(xv, yv)) == tuple(zv.shape)
def with_linker_inplace(self, linker): def with_linker_inplace(self, linker):
...@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase): ...@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
x = TensorType('float64', [(entry == 1) for entry in xsh])('x') x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
y = TensorType('float64', [(entry == 1) for entry in ysh])('y') y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function() f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
xv = numpy.asarray(numpy.random.rand(*xsh)) xv = numpy.asarray(numpy.random.rand(*xsh))
yv = numpy.asarray(numpy.random.rand(*ysh)) yv = numpy.asarray(numpy.random.rand(*ysh))
zv = xv + yv zv = xv + yv
...@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester): ...@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
e = tensor_op(x, axis=tosum) e = tensor_op(x, axis=tosum)
if tosum is None: if tosum is None:
tosum = range(len(xsh)) tosum = range(len(xsh))
f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function() f = copy(linker).accept(FunctionGraph([x],
[e.shape])).make_function()
if not(scalar_op in [scalar.maximum, scalar.minimum] and if not(scalar_op in [scalar.maximum, scalar.minimum] and
((xsh == () or numpy.prod(xsh) == 0))): ((xsh == () or numpy.prod(xsh) == 0))):
assert all(f(xv) == zv.shape) assert all(f(xv) == zv.shape)
...@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase): ...@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important # including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row) # (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1,2,3],[4,5,6],[7,8,9]], dtype='float32') x_val = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
dtype='float32')
x = theano.tensor.dmatrix() x = theano.tensor.dmatrix()
# now with verify_grad # now with verify_grad
unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode) unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
...@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase): ...@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
unittest_tools.verify_grad(fn, [x_val], mode=self.mode) unittest_tools.verify_grad(fn, [x_val], mode=self.mode)
def test_verify_grad_with_zeros(self): def test_verify_grad_with_zeros(self):
# including zeros, as the case with zeros is important # including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row) # (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1.,2.,3.],[0.,5.,6.],[0.,0.,9.]], dtype='float32') x_val = numpy.asarray([[1., 2., 3.], [0., 5., 6.], [0., 0., 9.]],
dtype='float32')
x = theano.tensor.dmatrix() x = theano.tensor.dmatrix()
# sanity check # sanity check
x2 = theano.tensor.dmatrix() x2 = theano.tensor.dmatrix()
p = Prod(axis=1)(x) p = Prod(axis=1)(x)
p2 = Prod(axis=1)(x2) p2 = Prod(axis=1)(x2)
fn = theano.function([x,x2],[p-p2], mode=self.mode) fn = theano.function([x, x2], [p - p2], mode=self.mode)
#print "hand computed diff for each row" #print "hand computed diff for each row"
x2_val = numpy.asarray([[1., 2., 3.003], [0.003,5.,6], [0.,0.,9.01]]) x2_val = numpy.asarray([[1., 2., 3.003], [0.003, 5., 6], [
0., 0., 9.01]])
#print fn(x_val, x2_val) #print fn(x_val, x2_val)
fn2 = theano.function([x],[theano.tensor.grad(p.sum(),x)], mode=self.mode) fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
mode=self.mode)
#print "real grad" #print "real grad"
#print fn2(x_val) #print fn2(x_val)
fn3 = theano.function([x],[p], mode=self.mode) fn3 = theano.function([x], [p], mode=self.mode)
assert numpy.allclose(fn3(x_val), [6.,0.,0.]) assert numpy.allclose(fn3(x_val), [6., 0., 0.])
# now with verify_grad # now with verify_grad
unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode) unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
...@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase): ...@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
def test_prod_without_zeros(self): def test_prod_without_zeros(self):
x = theano.tensor.dmatrix() x = theano.tensor.dmatrix()
x_val = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32') x_val = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]], dtype='float32')
pwz = ProdWithoutZeros(axis=1)(x) pwz = ProdWithoutZeros(axis=1)(x)
fn = theano.function([x], pwz, mode=self.mode) fn = theano.function([x], pwz, mode=self.mode)
assert numpy.allclose(fn(x_val), [6,30,9]) assert numpy.allclose(fn(x_val), [6, 30, 9])
pwz_a0 = ProdWithoutZeros(axis=0)(x) pwz_a0 = ProdWithoutZeros(axis=0)(x)
fn_a0 = theano.function([x], pwz_a0, mode=self.mode) fn_a0 = theano.function([x], pwz_a0, mode=self.mode)
...@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase): ...@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
def test_other_grad_tests(self): def test_other_grad_tests(self):
x = theano.tensor.dmatrix() x = theano.tensor.dmatrix()
x_val1 = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32') x_val1 = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]],
x_val2 = numpy.array([[1,2,0],[0,5,6],[7,8,9],[9,10,0]], dtype='float32') dtype='float32')
x_val2 = numpy.array([[1, 2, 0], [0, 5, 6], [7, 8, 9], [9, 10, 0]],
dtype='float32')
rng = rng = numpy.random.RandomState(43) rng = rng = numpy.random.RandomState(43)
p = Prod(axis=1) p = Prod(axis=1)
grad_p = theano.tensor.grad(p(x).sum(), x) grad_p = theano.tensor.grad(p(x).sum(), x)
grad_fn = theano.function([x], grad_p, mode=self.mode) grad_fn = theano.function([x], grad_p, mode=self.mode)
assert numpy.allclose(grad_fn(x_val1), [[6.,3.,2.],[30.,0.,0.],[0.,0.,0.]]) assert numpy.allclose(grad_fn(x_val1), [[6., 3., 2.], [30., 0.,
assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30., 0., 0.], [72., 63., 56.], [0., 0., 90.]]) 0.], [0., 0., 0.]])
assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30.,
0., 0.], [72., 63., 56.], [0., 0., 90.]])
p_axis0 = Prod(axis=0) p_axis0 = Prod(axis=0)
grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x) grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x)
grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode) grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode)
assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400., 0.],[63., 160., 0.], [0., 100., 0.], [0., 80., 0.]]) assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400.,
0.], [63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode) tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
def test_mul_without_zeros_zeros(self): def test_mul_without_zeros_zeros(self):
a = numpy.zeros((3,3)) a = numpy.zeros((3, 3))
x = theano.tensor.dmatrix() x = theano.tensor.dmatrix()
...@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase): ...@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
idx += 1 idx += 1
class T_mean_dtype(unittest.TestCase): class T_mean_dtype(unittest.TestCase):
def test_mean_default_dtype(self): def test_mean_default_dtype(self):
""" """
...@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase): ...@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
idx += 1 idx += 1
class T_prod_dtype(unittest.TestCase): class T_prod_dtype(unittest.TestCase):
def test_prod_default_dtype(self): def test_prod_default_dtype(self):
""" """
...@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase): ...@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
idx += 1 idx += 1
class T_prod_without_zeros_dtype(unittest.TestCase): class T_prod_without_zeros_dtype(unittest.TestCase):
def test_prod_without_zeros_default_dtype(self): def test_prod_without_zeros_default_dtype(self):
""" """
...@@ -844,11 +859,8 @@ if __name__ == '__main__': ...@@ -844,11 +859,8 @@ if __name__ == '__main__':
""" """
if __name__ == '__main__': if __name__ == '__main__':
t = TestElemwise('setUp') t = TestElemwise('setUp')
t.setUp() t.setUp()
t.test_infer_shape() t.test_infer_shape()
...@@ -12,15 +12,19 @@ import sys ...@@ -12,15 +12,19 @@ import sys
from theano.tests import unittest_tools from theano.tests import unittest_tools
from numpy.testing.noseclasses import KnownFailureTest from numpy.testing.noseclasses import KnownFailureTest
def cross_entropy(target, output, axis=1): def cross_entropy(target, output, axis=1):
""" """
@todo: This is essentially duplicated as tensor.nnet.binary_crossentropy @todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
@warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy @warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
""" """
return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis) return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
def quadratic(target, output, axis=1): def quadratic(target, output, axis=1):
return T.mean(T.sqr(target - output), axis=axis) return T.mean(T.sqr(target - output), axis=axis)
class QuadraticDenoisingAA(module.Module): class QuadraticDenoisingAA(module.Module):
"""Quadratic de-noising Auto-encoder """Quadratic de-noising Auto-encoder
...@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module): ...@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
""" """
def __init__(self, def __init__(self,
input = None, input=None,
# regularize = False, # regularize = False,
tie_weights = False, tie_weights=False,
n_quadratic_filters = 1, n_quadratic_filters=1,
_w1 = None, _w1=None,
_w2 = None, _w2=None,
_b1 = None, _b1=None,
_b2 = None, _b2=None,
_qfilters = None, _qfilters=None,
activation_function=NN.sigmoid, activation_function=NN.sigmoid,
reconstruction_cost_function=cross_entropy): reconstruction_cost_function=cross_entropy):
""" """
...@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module): ...@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
# PARAMETERS # PARAMETERS
if _qfilters is None: if _qfilters is None:
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)] #self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)] self.qfilters = [(T.dmatrix('q%i' % i))
for i in xrange(n_quadratic_filters)]
else: else:
#self.qfilters = [theano.Member(q) for q in _qfilters] #self.qfilters = [theano.Member(q) for q in _qfilters]
self.qfilters = [(q) for q in _qfilters] self.qfilters = [(q) for q in _qfilters]
...@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module): ...@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1) #self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
if _w1 is None: if _w1 is None:
self.w1 = (T.matrix('w1')) self.w1 = (T.matrix('w1'))
else: self.w1 = (_w1) else:
self.w1 = (_w1)
if _w2 is None: if _w2 is None:
if not tie_weights: if not tie_weights:
#self.w2 = theano.Member(T.matrix()) #self.w2 = theano.Member(T.matrix())
...@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module): ...@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1) #self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
if _b1 is None: if _b1 is None:
self.b1 = (T.vector('b1')) self.b1 = (T.vector('b1'))
else: self.b1 = (_b1) else:
self.b1 = (_b1)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2) #self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
if _b2 is None: if _b2 is None:
self.b2 = (T.vector('b2')) self.b2 = (T.vector('b2'))
else: self.b2 = (_b2) else:
self.b2 = (_b2)
# # REGULARIZATION COST # # REGULARIZATION COST
# self.regularization = self.build_regularization() # self.regularization = self.build_regularization()
### NOISELESS ### ### NOISELESS ###
# HIDDEN LAYER # HIDDEN LAYER
def _act(x): def _act(x):
if len(self.qfilters) > 0: if len(self.qfilters) > 0:
qsum = 10e-10 # helps to control the gradient in the square-root below qsum = 10e-10 # helps to control the gradient in the square-root below
for qf in self.qfilters: for qf in self.qfilters:
qsum = qsum + T.dot(x, qf)**2 qsum = qsum + T.dot(x, qf) ** 2
return T.dot(x, self.w1) + self.b1 + T.sqrt(qsum) return T.dot(x, self.w1) + self.b1 + T.sqrt(qsum)
else: else:
return T.dot(x, self.w1) + self.b1 return T.dot(x, self.w1) + self.b1
self.hidden_activation = _act(self.input) #noise-free hidden self.hidden_activation = _act(self.input) # noise-free hidden
self.hidden = self.hid_activation_function(self.hidden_activation) self.hidden = self.hid_activation_function(self.hidden_activation)
...@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module): ...@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize: # if self.regularize:
# self.cost = self.cost + self.regularization # self.cost = self.cost + self.regularization
### WITH NOISE ### ### WITH NOISE ###
self.corrupted_input = self.build_corrupted_input() self.corrupted_input = self.build_corrupted_input()
...@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module): ...@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize: # if self.regularize:
# self.ncost = self.ncost + self.regularization # self.ncost = self.ncost + self.regularization
# GRADIENTS AND UPDATES # GRADIENTS AND UPDATES
if self.tie_weights: if self.tie_weights:
self.params = [self.w1, self.b1, self.b2] + self.qfilters self.params = [self.w1, self.b1, self.b2] + self.qfilters
...@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module): ...@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
self.params = [self.w1, self.w2, self.b1, self.b2] + self.qfilters self.params = [self.w1, self.w2, self.b1, self.b2] + self.qfilters
gradients = T.grad(self.ncost, self.params) gradients = T.grad(self.ncost, self.params)
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients)) updates = dict((p, p - self.lr * g) for p, g in zip(self.
params, gradients))
# INTERFACE METHODS # INTERFACE METHODS
#self.update = theano.Method(self.input, self.ncost, updates) #self.update = theano.Method(self.input, self.ncost, updates)
...@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module): ...@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
filter's initial range) filter's initial range)
""" """
if (input_size is None) ^ (hidden_size is None): if (input_size is None) ^ (hidden_size is None):
raise ValueError("Must specify input_size and hidden_size or neither.") raise ValueError(
"Must specify input_size and hidden_size or neither.")
super(QuadraticDenoisingAA, self)._instance_initialize(obj, {}) super(QuadraticDenoisingAA, self)._instance_initialize(obj, {})
obj.random.initialize() obj.random.initialize()
R = N.random.RandomState(unittest_tools.fetch_seed(seed)) R = N.random.RandomState(unittest_tools.fetch_seed(seed))
if input_size is not None: if input_size is not None:
sz = (input_size, hidden_size) sz = (input_size, hidden_size)
inf = 1/N.sqrt(input_size) inf = 1 / N.sqrt(input_size)
hif = 1/N.sqrt(hidden_size) hif = 1 / N.sqrt(hidden_size)
obj.w1 = N.asarray(R.uniform(size = sz, low = -inf, high = inf), obj.w1 = N.asarray(R.uniform(size=sz, low=-inf, high=inf),
dtype=config.floatX) dtype=config.floatX)
if not self.tie_weights: if not self.tie_weights:
obj.w2 = N.asarray( obj.w2 = N.asarray(
...@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA): ...@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
def _instance_initialize(self, obj, input_size, hidden_size, noise_level, seed, lr, qfilter_relscale): def _instance_initialize(self, obj, input_size, hidden_size, noise_level, seed, lr, qfilter_relscale):
# obj.l2_coef = 0.0 # obj.l2_coef = 0.0
obj.noise_level = N.asarray(noise_level, dtype=config.floatX) obj.noise_level = N.asarray(noise_level, dtype=config.floatX)
super(SigmoidXEQuadraticDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale) super(SigmoidXEQuadraticDenoisingAA, self)
._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale)
QDAA = SigmoidXEQuadraticDenoisingAA QDAA = SigmoidXEQuadraticDenoisingAA
class Loss01(object): class Loss01(object):
def loss_01(self, x, targ): def loss_01(self, x, targ):
return N.mean(self.classify(x) != targ) return N.mean(self.classify(x) != targ)
class Module_Nclass(module.FancyModule): class Module_Nclass(module.FancyModule):
def _instance_initialize(mod_self, self, n_in, n_out, lr, seed): def _instance_initialize(mod_self, self, n_in, n_out, lr, seed):
#self.component is the LogisticRegressionTemplate instance that built this guy. #self.component is the LogisticRegressionTemplate instance that built this guy.
...@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule): ...@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
self.output_dimension = n_out self.output_dimension = n_out
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False): def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
super(Module_Nclass, self).__init__() #boilerplate super(Module_Nclass, self).__init__() # boilerplate
#self.x = module.Member(x) if x is not None else T.matrix('input') #self.x = module.Member(x) if x is not None else T.matrix('input')
if x is not None: if x is not None:
self.x = (x) self.x = (x)
else: self.x = T.matrix('input') else:
self.x = T.matrix('input')
#self.targ = module.Member(targ) if targ is not None else T.lvector() #self.targ = module.Member(targ) if targ is not None else T.lvector()
if targ is not None: if targ is not None:
self.targ = (targ) self.targ = (targ)
else: self.targ = T.lvector() else:
self.targ = T.lvector()
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
if w is not None: if w is not None:
self.w = (w) self.w = (w)
else: self.w = (T.dmatrix()) else:
self.w = (T.dmatrix())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector()) #self.b = module.Member(b) if b is not None else module.Member(T.dvector())
if b is not None: if b is not None:
self.b = (b) self.b = (b)
else: self.b = (T.dvector()) else:
self.b = (T.dvector())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) #self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
if lr is not None: if lr is not None:
self.lr = (lr) self.lr = (lr)
else: self.lr = (T.dscalar()) else:
self.lr = (T.dscalar())
self.params = [p for p in [self.w, self.b] if p.owner is None] self.params = [p for p in [self.w, self.b] if p.owner is None]
...@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule): ...@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
#self.update = module.Method([self.input, self.targ], sum_xent, #self.update = module.Method([self.input, self.targ], sum_xent,
#updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams))) #updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
class ConvolutionalMLP(module.FancyModule): class ConvolutionalMLP(module.FancyModule):
def __init__(self, def __init__(self,
window_size, window_size,
n_quadratic_filters, n_quadratic_filters,
activation_function, activation_function,
reconstruction_cost_function, reconstruction_cost_function,
tie_weights = False, tie_weights=False,
# _input, # _input,
# _targ # _targ
): ):
...@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
self.input_representations = [] self.input_representations = []
self.input_representations.append(QDAA( self.input_representations.append(QDAA(
input=self.inputs[0], input=self.inputs[0],
tie_weights = tie_weights, tie_weights=tie_weights,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters=n_quadratic_filters,
activation_function = activation_function, activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function reconstruction_cost_function = reconstruction_cost_function
) )
) )
...@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
self.input_representations.append( self.input_representations.append(
QDAA( QDAA(
input=i, input=i,
tie_weights = tie_weights, tie_weights=tie_weights,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters=n_quadratic_filters,
activation_function = activation_function, activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function, reconstruction_cost_function = reconstruction_cost_function,
_w1 = self.input_representations[0].w1, _w1 = self.input_representations[0].w1,
_w2 = self.input_representations[0].w2, _w2 = self.input_representations[0].w2,
...@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
_qfilters = self.input_representations[0].qfilters _qfilters = self.input_representations[0].qfilters
) )
) )
assert self.input_representations[-1].w1 is self.input_representations[0].w1 assert self.input_representations[-1]
.w1 is self.input_representations[0].w1
self.input_representation = T.concatenate([i.hidden for i in self.input_representations], axis=1) self.input_representation = T.concatenate([i.
hidden for i in self.input_representations], axis=1)
self.hidden = QDAA( self.hidden = QDAA(
input = self.input_representation, input=self.input_representation,
tie_weights = tie_weights, tie_weights=tie_weights,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters=n_quadratic_filters,
activation_function = activation_function, activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function reconstruction_cost_function = reconstruction_cost_function
) )
self.output = Module_Nclass(x=self.hidden.hidden, targ=self.targ) self.output = Module_Nclass(x=self.hidden.hidden, targ=self.targ)
...@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
self.hidden.b1, self.hidden.b1,
self.hidden.b2 self.hidden.b2
] + self.hidden.qfilters ] + self.hidden.qfilters
input_pretraining_cost = sum(i.ncost for i in self.input_representations) input_pretraining_cost = sum(i.ncost for i in self.
input_representations)
hidden_pretraining_cost = self.hidden.ncost hidden_pretraining_cost = self.hidden.ncost
input_pretraining_gradients = T.grad(input_pretraining_cost, input_pretraining_gradients = T.grad(input_pretraining_cost,
input_pretraining_params) input_pretraining_params)
hidden_pretraining_gradients = T.grad(hidden_pretraining_cost, hidden_pretraining_params) hidden_pretraining_gradients = T.grad(
hidden_pretraining_cost, hidden_pretraining_params)
pretraining_updates = \ pretraining_updates = \
dict((p, p - self.lr * g) for p, g in \ dict((p, p - self.lr * g) for p, g in \
zip(input_pretraining_params, input_pretraining_gradients) \ zip(input_pretraining_params, input_pretraining_gradients) \
...@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
[self.output.w, self.output.b] [self.output.w, self.output.b]
finetuning_cost = self.output.cost finetuning_cost = self.output.cost
finetuning_gradients = T.grad(finetuning_cost, finetuning_params) finetuning_gradients = T.grad(finetuning_cost, finetuning_params)
finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients)) finetuning_updates = dict((p, p - self.lr * g) for p,
self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates) g in zip(finetuning_params, finetuning_gradients))
self.finetuning_update = module.Method(self.inputs + [self.
targ], self.output.cost, finetuning_updates)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr]) #self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised) #self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
...@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
# for layer in obj.layers: # for layer in obj.layers:
# if layer.lr is None: # if layer.lr is None:
# layer.lr = lr # layer.lr = lr
assert self.input_representations[-1] is not self.input_representations[0] assert self.input_representations[-1]
assert self.input_representations[-1].w1 is self.input_representations[0].w1 is not self.input_representations[0]
assert self.input_representations[-1]
.w1 is self.input_representations[0].w1
for i in self.input_representations: for i in self.input_representations:
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale) # i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
...@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
assert (i.w2 == self.input_representations[0].w2).all() assert (i.w2 == self.input_representations[0].w2).all()
assert (i.b1 == self.input_representations[0].b1).all() assert (i.b1 == self.input_representations[0].b1).all()
assert (i.b2 == self.input_representations[0].b2).all() assert (i.b2 == self.input_representations[0].b2).all()
assert N.all((a==b).all() for a, b in zip(i.qfilters, self.input_representations[0].qfilters)) assert N.all((a == b).all() for a, b in zip(i.
qfilters, self.input_representations[0].qfilters))
self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size), self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size),
hidden_size=self.hidden_representation_size, noise_level=noise_level, hidden_size=self.hidden_representation_size, noise_level=noise_level,
seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale) seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
self.output.initialize(n_in=self.hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30)) self.output.initialize(n_in=self.
hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
def create(window_size=3, def create(window_size=3,
input_dimension=9, input_dimension=9,
...@@ -488,22 +514,24 @@ def create(window_size=3, ...@@ -488,22 +514,24 @@ def create(window_size=3,
activation_function = T.tanh activation_function = T.tanh
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size=window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters=n_quadratic_filters,
activation_function = activation_function, activation_function=activation_function,
reconstruction_cost_function = quadratic, reconstruction_cost_function=quadratic,
tie_weights = False tie_weights=False
) )
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension,
input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
return model return model
def create_realistic(window_size=3,#7,
def create_realistic(window_size=3, # 7,
input_dimension=200, input_dimension=200,
output_vocabsize=23, output_vocabsize=23,
n_quadratic_filters=2, n_quadratic_filters=2,
...@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7, ...@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
activation_function = T.tanh activation_function = T.tanh
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size=window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters=n_quadratic_filters,
activation_function = activation_function, activation_function=activation_function,
reconstruction_cost_function = quadratic, reconstruction_cost_function=quadratic,
tie_weights = False tie_weights=False
) )
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension,
input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
return model return model
def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
optimizer=None, realistic=False): optimizer=None, realistic=False):
#print "BUILDING MODEL" #print "BUILDING MODEL"
...@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
if optimizer: if optimizer:
mode = theano.Mode(linker='c|py', optimizer=optimizer) mode = theano.Mode(linker='c|py', optimizer=optimizer)
else: mode = get_default_mode() else:
mode = get_default_mode()
if mode.__class__.__name__ == 'DebugMode': if mode.__class__.__name__ == 'DebugMode':
iters_per_unsup=1 iters_per_unsup = 1
iters_per_sup =1 iters_per_sup = 1
if realistic: if realistic:
m = create_realistic(compile_mode=mode) m = create_realistic(compile_mode=mode)
...@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
for i, node in enumerate(m.pretraining_update.maker.fgraph.toposort()): for i, node in enumerate(m.pretraining_update.maker.fgraph.toposort()):
idx_of_node[node] = i idx_of_node[node] = i
if False and i > -1: if False and i > -1:
print ' ', i, node, [(ii, idx_of_node.get(ii.owner, 'IN')) for ii in node.inputs] print ' ', i, node, [(ii, idx_of_node.get(ii.
owner, 'IN')) for ii in node.inputs]
prog_str.append(str(node)) prog_str.append(str(node))
#print input_pretraining_gradients[4].owner.inputs #print input_pretraining_gradients[4].owner.inputs
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
...@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
rng = N.random.RandomState(unittest_tools.fetch_seed(23904)) rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
inputs = [rng.rand(10,m.input_size) for i in 1,2,3] inputs = [rng.rand(10, m.input_size) for i in 1, 2, 3]
targets = N.asarray([0,3,4,2,3,4,4,2,1,0]) targets = N.asarray([0, 3, 4, 2, 3, 4, 4, 2, 1, 0])
#print inputs #print inputs
#print 'UNSUPERVISED PHASE' #print 'UNSUPERVISED PHASE'
...@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
s0, s1 = [str(j) for j in m.pretraining_update(*inputs)] s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
#print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1 #print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if iters_per_unsup == 3: if iters_per_unsup == 3:
assert s0.startswith('0.927793')#'0.403044') assert s0.startswith('0.927793') # '0.403044')
assert s1.startswith('0.068035')#'0.074898') assert s1.startswith('0.068035') # '0.074898')
#print 'UNSUPERVISED took %.3fs'%(time.time() - t) #print 'UNSUPERVISED took %.3fs'%(time.time() - t)
#print 'FINETUNING GRAPH' #print 'FINETUNING GRAPH'
...@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3, ...@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
assert 19.7042 < s0f and s0f < 19.7043 assert 19.7042 < s0f and s0f < 19.7043
#print 'SUPERVISED took %.3fs'%( time.time() - t) #print 'SUPERVISED took %.3fs'%( time.time() - t)
def jtest_main(): def jtest_main():
from theano import gof from theano import gof
JTEST = theano.compile.mode.optdb.query(*sys.argv[2:]) JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
...@@ -609,13 +642,17 @@ def jtest_main(): ...@@ -609,13 +642,17 @@ def jtest_main():
optimizer = eval(sys.argv[1]) optimizer = eval(sys.argv[1])
test_naacl_model(optimizer, 10, 10, realistic=False) test_naacl_model(optimizer, 10, 10, realistic=False)
def real_main(): def real_main():
test_naacl_model() test_naacl_model()
def profile_main(): def profile_main():
# This is the main function for profiling # This is the main function for profiling
# We've renamed our original main() above to real_main() # We've renamed our original main() above to real_main()
import cProfile, pstats, StringIO import cProfile
import pstats
import StringIO
prof = cProfile.Profile() prof = cProfile.Profile()
prof = prof.runctx("real_main()", globals(), locals()) prof = prof.runctx("real_main()", globals(), locals())
stream = StringIO.StringIO() stream = StringIO.StringIO()
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
inputs = [theano.tensor.vector()] inputs = [theano.tensor.vector()]
outputs = [theano.tensor.vector()] outputs = [theano.tensor.vector()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads): def grad(self, inp, grads):
x, = inp x, = inp
gz, = grads gz, = grads
...@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
def make_node(self, *inputs): def make_node(self, *inputs):
outputs = [theano.tensor.vector()] outputs = [theano.tensor.vector()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads): def grad(self, inputs, grads):
return [ inputs[0].zeros_like() ] return [inputs[0].zeros_like()]
i = theano.tensor.vector() i = theano.tensor.vector()
j = theano.tensor.vector() j = theano.tensor.vector()
a1 = retOne().make_node(i) a1 = retOne().make_node(i)
g = grad_sources_inputs([(a1.out, one)], None) g = grad_sources_inputs([(a1.out, one)], None)
a2 = retOne().make_node(i,j) a2 = retOne().make_node(i, j)
try: try:
g = grad_sources_inputs([(a2.out, one)], None) g = grad_sources_inputs([(a2.out, one)], None)
except ValueError, e: except ValueError, e:
...@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def test_1in_1out(self): def test_1in_1out(self):
"""Test grad is called correctly for a 1-to-1 op""" """Test grad is called correctly for a 1-to-1 op"""
gval = theano.tensor.matrix() gval = theano.tensor.matrix()
class O(gof.op.Op): class O(gof.op.Op):
def make_node(self): def make_node(self):
inputs = [theano.tensor.matrix()] inputs = [theano.tensor.matrix()]
outputs = [theano.tensor.matrix()] outputs = [theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads): def grad(self, inp, grads):
return gval, return gval,
a1 = O().make_node() a1 = O().make_node()
...@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def test_1in_Nout(self): def test_1in_Nout(self):
"""Test grad is called correctly for a 1-to-many op""" """Test grad is called correctly for a 1-to-many op"""
gval = theano.tensor.matrix() gval = theano.tensor.matrix()
class O(gof.op.Op): class O(gof.op.Op):
def make_node(self): def make_node(self):
inputs = [theano.tensor.matrix()] inputs = [theano.tensor.matrix()]
outputs = [theano.tensor.scalar(),theano.tensor.scalar()] outputs = [theano.tensor.scalar(), theano.tensor.scalar()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads): def grad(self, inp, grads):
x, = inp x, = inp
gz1, gz2 = grads gz1, gz2 = grads
...@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-1 op""" """Test grad is called correctly for a many-to-1 op"""
gval0 = theano.tensor.scalar() gval0 = theano.tensor.scalar()
gval1 = theano.tensor.scalar() gval1 = theano.tensor.scalar()
class O(gof.op.Op): class O(gof.op.Op):
def make_node(self): def make_node(self):
inputs = [theano.tensor.scalar(), theano.tensor.scalar()] inputs = [theano.tensor.scalar(), theano.tensor.scalar()]
outputs = [theano.tensor.matrix()] outputs = [theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads): def grad(self, inp, grads):
x0, x1 = inp x0, x1 = inp
gz, = grads gz, = grads
...@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-many op""" """Test grad is called correctly for a many-to-many op"""
gval0 = theano.tensor.matrix() gval0 = theano.tensor.matrix()
gval1 = theano.tensor.matrix() gval1 = theano.tensor.matrix()
class O(gof.op.Op): class O(gof.op.Op):
def make_node(self): def make_node(self):
inputs = [theano.tensor.matrix(),theano.tensor.matrix()] inputs = [theano.tensor.matrix(), theano.tensor.matrix()]
outputs = [theano.tensor.matrix(),theano.tensor.matrix()] outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads): def grad(self, inp, grads):
return gval0, gval1 return gval0, gval1
a1 = O().make_node() a1 = O().make_node()
...@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
class O(gof.op.Op): class O(gof.op.Op):
def __init__(self, tst): def __init__(self, tst):
self.tst = tst self.tst = tst
def make_node(self, *inputs): def make_node(self, *inputs):
outputs = [theano.tensor.matrix(),theano.tensor.matrix()] outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs) return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out): def grad(self, inputs, g_out):
return [one] return [one]
i = theano.tensor.matrix() i = theano.tensor.matrix()
...@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase): ...@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
g = grad_sources_inputs([(a1.outputs[0], one)], None) g = grad_sources_inputs([(a1.outputs[0], one)], None)
self.assertTrue(g[i] is one) self.assertTrue(g[i] is one)
def test_unimplemented_grad_func(): def test_unimplemented_grad_func():
# tests that function compilation catches unimplemented grads in the graph # tests that function compilation catches unimplemented grads in the graph
a = theano.tensor.vector() a = theano.tensor.vector()
b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a) b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
try: try:
f = theano.function([a], b, on_unused_input = 'ignore') f = theano.function([a], b, on_unused_input='ignore')
assert 0 assert 0
except TypeError: except TypeError:
pass pass
def test_undefined_grad_func(): def test_undefined_grad_func():
#tests that function compilation catches undefined grads in the graph #tests that function compilation catches undefined grads in the graph
a = theano.tensor.vector() a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a) b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try: try:
f = theano.function([a],b, on_unused_input = 'ignore') f = theano.function([a], b, on_unused_input='ignore')
assert 0 assert 0
except TypeError: except TypeError:
pass pass
def test_unimplemented_grad_grad(): def test_unimplemented_grad_grad():
#tests that unimplemented grads are caught in the grad method #tests that unimplemented grads are caught in the grad method
...@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad(): ...@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
return gof.Apply(self, [x], [x.type()]) return gof.Apply(self, [x], [x.type()])
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [ theano.gradient.grad_not_implemented(self, 0, inputs[0]) ] return [theano.gradient.grad_not_implemented(self, 0, inputs[0])]
a = theano.tensor.scalar() a = theano.tensor.scalar()
b = DummyOp()(a) b = DummyOp()(a)
try: try:
g = theano.gradient.grad(b,a) g = theano.gradient.grad(b, a)
assert False assert False
except TypeError: except TypeError:
pass pass
def test_undefined_grad_grad(): def test_undefined_grad_grad():
#tests that undefined grads are caught in the grad method #tests that undefined grads are caught in the grad method
V = theano.tensor.TensorType(dtype=config.floatX, V = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False,False,False,False,False))() broadcastable=(False, False, False, False, False))()
W = theano.tensor.TensorType(dtype=config.floatX, W = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False, False, False, False, False))() broadcastable=(False, False, False, False, False))()
b = theano.tensor.vector() b = theano.tensor.vector()
d = theano.tensor.ivector() d = theano.tensor.ivector()
Z = conv3D(V,W,b,d) Z = conv3D(V, W, b, d)
try: try:
g = theano.gradient.grad(Z.sum(),d) g = theano.gradient.grad(Z.sum(), d)
assert False assert False
except TypeError: except TypeError:
pass pass
def test_grad_name(): def test_grad_name():
A = theano.tensor.matrix('A') A = theano.tensor.matrix('A')
x = theano.tensor.vector('x') x = theano.tensor.vector('x')
f = theano.tensor.dot(x,theano.tensor.dot(A,x)) f = theano.tensor.dot(x, theano.tensor.dot(A, x))
f.name = 'f' f.name = 'f'
g = theano.tensor.grad(f,x) g = theano.tensor.grad(f, x)
assert g.name == '(df/dx)' assert g.name == '(df/dx)'
def test_grad_duplicate_input(): def test_grad_duplicate_input():
#test that the grad works when a variable #test that the grad works when a variable
#appears in more than one place in a node's input list #appears in more than one place in a node's input list
def output(x): def output(x):
return (x*x) return (x * x)
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
theano.tests.unittest_tools.verify_grad(output,[vx]) theano.tests.unittest_tools.verify_grad(output, [vx])
def test_grad_quadratic(): def test_grad_quadratic():
#test the gradient on a tiny graph #test the gradient on a tiny graph
def cost(x,A): def cost(x, A):
return theano.tensor.dot(x,theano.tensor.dot(A,x)) return theano.tensor.dot(x, theano.tensor.dot(A, x))
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
vA = rng.randn(2,2) vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(cost,[vx,vA]) theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
def test_grad_quadratic_vector(): def test_grad_quadratic_vector():
#test the gradient on a small graph #test the gradient on a small graph
def output(x,A): def output(x, A):
return theano.tensor.dot(x*x,A) return theano.tensor.dot(x * x, A)
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
vA = rng.randn(2,2) vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output,[vx,vA]) theano.tests.unittest_tools.verify_grad(output, [vx, vA])
def test_grad_cubic(): def test_grad_cubic():
#test the gradient on a bigger graph #test the gradient on a bigger graph
def cost(x,A): def cost(x, A):
return theano.tensor.dot(x*x,theano.tensor.dot(A,x)) return theano.tensor.dot(x * x, theano.tensor.dot(A, x))
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
vA = rng.randn(2,2) vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
theano.tests.unittest_tools.verify_grad(cost,[vx,vA])
def test_grad_grad_quadratic(): def test_grad_grad_quadratic():
#test the gradient on a graph constructed using the gradient #test the gradient on a graph constructed using the gradient
def output(x,A): def output(x, A):
orig_cost = theano.tensor.dot(x,theano.tensor.dot(A,x)) orig_cost = theano.tensor.dot(x, theano.tensor.dot(A, x))
return theano.gradient.grad(orig_cost, x) return theano.gradient.grad(orig_cost, x)
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
vA = rng.randn(2,2) vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output, [vx, vA])
theano.tests.unittest_tools.verify_grad(output,[vx,vA])
def test_grad_grad_cubic(): def test_grad_grad_cubic():
#test the gradient on a bigger graph constructed using the gradient #test the gradient on a bigger graph constructed using the gradient
def output(x,A): def output(x, A):
orig_cost = theano.tensor.dot(x*x,theano.tensor.dot(A,x)) orig_cost = theano.tensor.dot(x * x, theano.tensor.dot(A, x))
return theano.gradient.grad(orig_cost, x) return theano.gradient.grad(orig_cost, x)
rng = np.random.RandomState([2012,8,28]) rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2) vx = rng.randn(2)
vA = rng.randn(2,2) vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output, [vx, vA])
theano.tests.unittest_tools.verify_grad(output,[vx,vA])
def test_grad_int(): def test_grad_int():
...@@ -300,11 +320,11 @@ def test_grad_int(): ...@@ -300,11 +320,11 @@ def test_grad_int():
b = theano.tensor.vector() b = theano.tensor.vector()
def make_grad_func(X): def make_grad_func(X):
Z = theano.tensor.dot(X,W) + b Z = theano.tensor.dot(X, W) + b
H = theano.tensor.nnet.sigmoid(Z) H = theano.tensor.nnet.sigmoid(Z)
cost = H.sum() cost = H.sum()
g = gradient.grad(cost,X) g = gradient.grad(cost, X)
return theano.function([X,W,b],g, on_unused_input = 'ignore') return theano.function([X, W, b], g, on_unused_input='ignore')
int_func = make_grad_func(theano.tensor.imatrix()) int_func = make_grad_func(theano.tensor.imatrix())
#we have to use float64 as the float type to get the results to match #we have to use float64 as the float type to get the results to match
...@@ -314,17 +334,17 @@ def test_grad_int(): ...@@ -314,17 +334,17 @@ def test_grad_int():
m = 5 m = 5
d = 3 d = 3
n = 4 n = 4
rng = np.random.RandomState([2012,9,5]) rng = np.random.RandomState([2012, 9, 5])
int_type = theano.tensor.imatrix().dtype int_type = theano.tensor.imatrix().dtype
float_type = 'float64' float_type = 'float64'
X = np.cast[int_type](rng.randn(m,d) * 127.) X = np.cast[int_type](rng.randn(m, d) * 127.)
W = np.cast[W.dtype](rng.randn(d,n)) W = np.cast[W.dtype](rng.randn(d, n))
b = np.cast[b.dtype](rng.randn(n)) b = np.cast[b.dtype](rng.randn(n))
int_result = int_func(X,W,b) int_result = int_func(X, W, b)
float_result = float_func(np.cast[float_type](X),W,b) float_result = float_func(np.cast[float_type](X), W, b)
assert np.allclose(int_result, float_result) assert np.allclose(int_result, float_result)
...@@ -333,23 +353,23 @@ def test_grad_disconnected(): ...@@ -333,23 +353,23 @@ def test_grad_disconnected():
#tests corner cases of gradient for shape and alloc #tests corner cases of gradient for shape and alloc
x = theano.tensor.vector(name = 'x') x = theano.tensor.vector(name='x')
total = x.sum() total = x.sum()
total.name = 'total' total.name = 'total'
num_elements = x.shape[0] num_elements = x.shape[0]
num_elements.name = 'num_elements' num_elements.name = 'num_elements'
silly_vector = theano.tensor.alloc( total / num_elements, num_elements) silly_vector = theano.tensor.alloc(total / num_elements, num_elements)
silly_vector.name = 'silly_vector' silly_vector.name = 'silly_vector'
cost = silly_vector.sum() cost = silly_vector.sum()
cost.name = 'cost' cost.name = 'cost'
#note that cost simplifies to be the same as "total" #note that cost simplifies to be the same as "total"
g = gradient.grad(cost, x, add_names = False) g = gradient.grad(cost, x, add_names=False)
#we still need to pass in x because it determines the shape of the output #we still need to pass in x because it determines the shape of the output
f = theano.function([x],g) f = theano.function([x], g)
rng = np.random.RandomState([2012,9,5]) rng = np.random.RandomState([2012, 9, 5])
x = np.cast[x.dtype](rng.randn(3)) x = np.cast[x.dtype](rng.randn(3))
g = f(x) g = f(x)
assert np.allclose(g,np.ones(x.shape,dtype=x.dtype)) assert np.allclose(g, np.ones(x.shape, dtype=x.dtype))
def test_disconnected_nan(): def test_disconnected_nan():
...@@ -361,27 +381,27 @@ def test_disconnected_nan(): ...@@ -361,27 +381,27 @@ def test_disconnected_nan():
class Op1(theano.gof.Op): class Op1(theano.gof.Op):
def make_node(self, x): def make_node(self, x):
return theano.Apply(self, inputs=[x], return theano.Apply(self, inputs=[x],
outputs = [ x.type(), theano.tensor.scalar() ]) outputs=[x.type(), theano.tensor.scalar()])
def connection_pattern(self, node): def connection_pattern(self, node):
return [[True, False]] return [[True, False]]
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [ inputs[0].zeros_like() ] return [inputs[0].zeros_like()]
# Op2 has two inputs, f and g # Op2 has two inputs, f and g
# Its gradient with respect to g is not defined # Its gradient with respect to g is not defined
class Op2(theano.gof.Op): class Op2(theano.gof.Op):
def make_node(self, f, g): def make_node(self, f, g):
return theano.Apply(self, inputs=[f,g], return theano.Apply(self, inputs=[f, g],
outputs = [ theano.tensor.scalar() ]) outputs=[theano.tensor.scalar()])
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [ inputs[0].zeros_like(), NullType()() ] return [inputs[0].zeros_like(), NullType()()]
x = theano.tensor.vector() x = theano.tensor.vector()
f, g = Op1()(x) f, g = Op1()(x)
cost = Op2()(f,g) cost = Op2()(f, g)
# cost is differentiable wrt x # cost is differentiable wrt x
# but we can't tell that without using Op1's connection pattern # but we can't tell that without using Op1's connection pattern
...@@ -394,7 +414,6 @@ def test_disconnected_nan(): ...@@ -394,7 +414,6 @@ def test_disconnected_nan():
# connection_pattern functionality worked correctly # connection_pattern functionality worked correctly
def test_sum_disconnected(): def test_sum_disconnected():
# Tests that we can add DisconnectedType to other terms correctly # Tests that we can add DisconnectedType to other terms correctly
...@@ -402,7 +421,7 @@ def test_sum_disconnected(): ...@@ -402,7 +421,7 @@ def test_sum_disconnected():
y = x * 2. y = x * 2.
z = x + 1. z = x + 1.
cost = y + z cost = y + z
theano.tensor.grad(cost, x, consider_constant=[y,z]) theano.tensor.grad(cost, x, consider_constant=[y, z])
# In an earlier version of theano, the above line would have failed # In an earlier version of theano, the above line would have failed
# while trying to add two DisconnectedTypes # while trying to add two DisconnectedTypes
......
...@@ -47,7 +47,7 @@ class BreakRop(Op): ...@@ -47,7 +47,7 @@ class BreakRop(Op):
out[0] = x out[0] = x
def grad(self, inp, grads): def grad(self, inp, grads):
return [ grad_undefined(self, 0, inp[0]) ] return [grad_undefined(self, 0, inp[0])]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
return [None] return [None]
...@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker): ...@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
m_ = tensor.matrix('m_') m_ = tensor.matrix('m_')
v_ = tensor.vector('v_') v_ = tensor.vector('v_')
mval = self.rng.uniform(size=(3,7)).astype(theano.config.floatX) mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX) vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
m_val = self.rng.uniform(size=(3,7)).astype(theano.config.floatX) m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX) v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
rop_out1 = tensor.Rop([m, v, m+v], [m, v], [m_, v_]) rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
assert isinstance(rop_out1, list) assert isinstance(rop_out1, list)
assert len(rop_out1) == 3 assert len(rop_out1) == 3
rop_out2 = tensor.Rop((m, v, m+v), [m, v], [m_, v_]) rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
assert isinstance(rop_out2, tuple) assert isinstance(rop_out2, tuple)
assert len(rop_out2) == 3 assert len(rop_out2) == 3
lop_out1 = tensor.Lop([m, v, m+v], (m, v), [m_, v_]) lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
assert isinstance(lop_out1, tuple) assert isinstance(lop_out1, tuple)
assert len(lop_out1) == 2 assert len(lop_out1) == 2
lop_out2 = tensor.Lop((m, v, m+v), [m, v], [m_, v_]) lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
assert isinstance(lop_out2, list) assert isinstance(lop_out2, list)
assert len(lop_out2) == 2 assert len(lop_out2) == 2
...@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker): ...@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
all_outs.extend(o) all_outs.extend(o)
f = theano.function([m, v, m_, v_], all_outs) f = theano.function([m, v, m_, v_], all_outs)
f(mval, vval, m_val, v_val) f(mval, vval, m_val, v_val)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论