提交 7885e618 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

a lot of pep8

上级 c7d06ac9
......@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
assert rval.type.dtype.find('float') != -1
return [ rval ]
return [rval]
def c_code(self, node, name, inp, out, sub):
x, = inp
......@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0:0},
inplace_pattern={0: 0},
name='sigmoid_inplace',
)
......@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, inp, grads):
x, = inp
gz, = grads
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
......@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name= 'scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
def _skip_mul_1(r):
if r.owner and r.owner.op == tensor.mul:
not_is_1 = [i for i in r.owner.inputs if not _is_1(i) ]
if len(not_is_1)==1:
not_is_1 = [i for i in r.owner.inputs if not _is_1(i)]
if len(not_is_1) == 1:
return not_is_1[0]
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True,
allow_multiple_clients=True,
skip_identities_fn=_skip_mul_1)
......@@ -139,21 +144,22 @@ def _is_1(expr):
log1msigm_to_softplus = gof.PatternSub(
(tensor.log,
(tensor.sub,
dict(pattern='y', constraint = _is_1),
dict(pattern='y', constraint=_is_1),
(sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True,
allow_multiple_clients=True,
skip_identities_fn=_skip_mul_1)
log1pexp_to_softplus = gof.PatternSub(
(tensor.log1p,
(tensor.exp, 'x')),
(softplus, 'x'),
allow_multiple_clients = True)
allow_multiple_clients=True)
opt.register_stabilize(logsigm_to_softplus, name='logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name='log1msigm_to_softplus')
opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
opt.register_stabilize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
opt.register_stabilize(log1pexp_to_softplus, name = 'log1pexp_to_softplus')
def is_1pexp(t):
"""
......@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
else:
neg_t, f_t = f_t
f_terms.append(f_t)
neg ^= neg_t #bit flip if neg_t is true
neg ^= neg_t # bit flip if neg_t is true
return f_terms, rest, neg
......@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
#find all the exp() terms in the numerator
num, denom = node.inputs
num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp)
denom_1pexp, denom_rest,
denom_neg = partition_num_or_denom(denom, is_1pexp)
sigmoids = []
for t in denom_1pexp:
......@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
# case: 1/(1+exp(x))
sigmoids.append(sigmoid(-t))
if not sigmoids: # we didn't find any. abort
if not sigmoids: # we didn't find any. abort
return
# put the new numerator together
new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
......@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
else:
return [new_num / tensor.mul(*denom_rest)]
def parse_mul_tree(root):
"""
Parse a tree of multiplications starting at the given root.
......@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
sigm_minus_x = []
if full_tree is None:
full_tree = tree
if False: # Debug code.
if False: # Debug code.
print '<perform_sigm_times_exp>'
print ' full_tree = %s' % full_tree
print ' tree = %s' % tree
......@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
if scalars and numpy.allclose(numpy.sum(scalars), 1):
return opt._fill_chain(
sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])),
sigmoid(
tensor.neg(nonconsts[0].owner.inputs[0])),
scalar_inputs)
# Registration is below, and conditional.
@gof.local_optimizer([tensor.sub])
def local_1msigmoid(node):
"""
......@@ -633,7 +644,7 @@ def local_1msigmoid(node):
if node.op == tensor.sub:
sub_l, sub_r = node.inputs
if len(sub_r.clients) > 1:
return # graph is using both sigm and 1-sigm
return # graph is using both sigm and 1-sigm
if sub_r.owner and sub_r.owner.op == sigmoid:
try:
val_l = opt.get_constant_value(sub_l)
......@@ -686,13 +697,14 @@ if 0:
assert t0.owner.op == div
t0top, t0bot = t0.owner.inputs
t1top, t1bot = t1.owner.inputs
rval.append(div(mul(*(t0top+t1top)), mul(*(t0bot+t1bot))))
rval.append(div(mul(*(
t0top + t1top)), mul(*(t0bot + t1bot))))
if len(rval) > 100:
# This loop can be exponentially long.
# aborting
return []
elif len(node.outputs)>1:
elif len(node.outputs) > 1:
return []
else:
return [node.outputs[0]]
......@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
def __init__(self, seed=None, no_warn = False):
def __init__(self, seed=None, no_warn=False):
""":type seed: None or int
:param seed: a default seed to initialize the RandomState
......@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
if not no_warn:
deprecation_warning()
super(RandomStreams, self).__init__(no_warn = True)
super(RandomStreams, self).__init__(no_warn=True)
self.random_state_variables = []
self.default_instance_seed = seed
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
#test that DimShuffle.infer_shape work correctly
x = TensorType('float64', ib)('x')
e = DimShuffle(ib, shuffle)(x)
f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph([x], [e.
shape])).make_function()
assert all(f(numpy.ones(xsh))) == all(zsh)
# Test when we drop a axis that is not broadcastable
......@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
e = Elemwise(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
assert tuple(f(xv, yv)) == tuple(zv.shape)
def with_linker_inplace(self, linker):
......@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
xv = numpy.asarray(numpy.random.rand(*xsh))
yv = numpy.asarray(numpy.random.rand(*ysh))
zv = xv + yv
......@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
e = tensor_op(x, axis=tosum)
if tosum is None:
tosum = range(len(xsh))
f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph([x],
[e.shape])).make_function()
if not(scalar_op in [scalar.maximum, scalar.minimum] and
((xsh == () or numpy.prod(xsh) == 0))):
assert all(f(xv) == zv.shape)
......@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1,2,3],[4,5,6],[7,8,9]], dtype='float32')
x_val = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
dtype='float32')
x = theano.tensor.dmatrix()
# now with verify_grad
unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
......@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
unittest_tools.verify_grad(fn, [x_val], mode=self.mode)
def test_verify_grad_with_zeros(self):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1.,2.,3.],[0.,5.,6.],[0.,0.,9.]], dtype='float32')
x_val = numpy.asarray([[1., 2., 3.], [0., 5., 6.], [0., 0., 9.]],
dtype='float32')
x = theano.tensor.dmatrix()
# sanity check
x2 = theano.tensor.dmatrix()
p = Prod(axis=1)(x)
p2 = Prod(axis=1)(x2)
fn = theano.function([x,x2],[p-p2], mode=self.mode)
fn = theano.function([x, x2], [p - p2], mode=self.mode)
#print "hand computed diff for each row"
x2_val = numpy.asarray([[1., 2., 3.003], [0.003,5.,6], [0.,0.,9.01]])
x2_val = numpy.asarray([[1., 2., 3.003], [0.003, 5., 6], [
0., 0., 9.01]])
#print fn(x_val, x2_val)
fn2 = theano.function([x],[theano.tensor.grad(p.sum(),x)], mode=self.mode)
fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
mode=self.mode)
#print "real grad"
#print fn2(x_val)
fn3 = theano.function([x],[p], mode=self.mode)
assert numpy.allclose(fn3(x_val), [6.,0.,0.])
fn3 = theano.function([x], [p], mode=self.mode)
assert numpy.allclose(fn3(x_val), [6., 0., 0.])
# now with verify_grad
unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
......@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
def test_prod_without_zeros(self):
x = theano.tensor.dmatrix()
x_val = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32')
x_val = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]], dtype='float32')
pwz = ProdWithoutZeros(axis=1)(x)
fn = theano.function([x], pwz, mode=self.mode)
assert numpy.allclose(fn(x_val), [6,30,9])
assert numpy.allclose(fn(x_val), [6, 30, 9])
pwz_a0 = ProdWithoutZeros(axis=0)(x)
fn_a0 = theano.function([x], pwz_a0, mode=self.mode)
......@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
def test_other_grad_tests(self):
x = theano.tensor.dmatrix()
x_val1 = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32')
x_val2 = numpy.array([[1,2,0],[0,5,6],[7,8,9],[9,10,0]], dtype='float32')
x_val1 = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]],
dtype='float32')
x_val2 = numpy.array([[1, 2, 0], [0, 5, 6], [7, 8, 9], [9, 10, 0]],
dtype='float32')
rng = rng = numpy.random.RandomState(43)
p = Prod(axis=1)
grad_p = theano.tensor.grad(p(x).sum(), x)
grad_fn = theano.function([x], grad_p, mode=self.mode)
assert numpy.allclose(grad_fn(x_val1), [[6.,3.,2.],[30.,0.,0.],[0.,0.,0.]])
assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30., 0., 0.], [72., 63., 56.], [0., 0., 90.]])
assert numpy.allclose(grad_fn(x_val1), [[6., 3., 2.], [30., 0.,
0.], [0., 0., 0.]])
assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30.,
0., 0.], [72., 63., 56.], [0., 0., 90.]])
p_axis0 = Prod(axis=0)
grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x)
grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode)
assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400., 0.],[63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400.,
0.], [63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
def test_mul_without_zeros_zeros(self):
a = numpy.zeros((3,3))
a = numpy.zeros((3, 3))
x = theano.tensor.dmatrix()
......@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
idx += 1
class T_mean_dtype(unittest.TestCase):
def test_mean_default_dtype(self):
"""
......@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
idx += 1
class T_prod_dtype(unittest.TestCase):
def test_prod_default_dtype(self):
"""
......@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
idx += 1
class T_prod_without_zeros_dtype(unittest.TestCase):
def test_prod_without_zeros_default_dtype(self):
"""
......@@ -844,11 +859,8 @@ if __name__ == '__main__':
"""
if __name__ == '__main__':
t = TestElemwise('setUp')
t.setUp()
t.test_infer_shape()
......@@ -12,15 +12,19 @@ import sys
from theano.tests import unittest_tools
from numpy.testing.noseclasses import KnownFailureTest
def cross_entropy(target, output, axis=1):
"""
@todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
@warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
"""
return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
def quadratic(target, output, axis=1):
return T.mean(T.sqr(target - output), axis=axis)
class QuadraticDenoisingAA(module.Module):
"""Quadratic de-noising Auto-encoder
......@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
"""
def __init__(self,
input = None,
input=None,
# regularize = False,
tie_weights = False,
n_quadratic_filters = 1,
_w1 = None,
_w2 = None,
_b1 = None,
_b2 = None,
_qfilters = None,
tie_weights=False,
n_quadratic_filters=1,
_w1=None,
_w2=None,
_b1=None,
_b2=None,
_qfilters=None,
activation_function=NN.sigmoid,
reconstruction_cost_function=cross_entropy):
"""
......@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
# PARAMETERS
if _qfilters is None:
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i' % i))
for i in xrange(n_quadratic_filters)]
else:
#self.qfilters = [theano.Member(q) for q in _qfilters]
self.qfilters = [(q) for q in _qfilters]
......@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
if _w1 is None:
self.w1 = (T.matrix('w1'))
else: self.w1 = (_w1)
else:
self.w1 = (_w1)
if _w2 is None:
if not tie_weights:
#self.w2 = theano.Member(T.matrix())
......@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
if _b1 is None:
self.b1 = (T.vector('b1'))
else: self.b1 = (_b1)
else:
self.b1 = (_b1)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
if _b2 is None:
self.b2 = (T.vector('b2'))
else: self.b2 = (_b2)
else:
self.b2 = (_b2)
# # REGULARIZATION COST
# self.regularization = self.build_regularization()
### NOISELESS ###
# HIDDEN LAYER
def _act(x):
if len(self.qfilters) > 0:
qsum = 10e-10 # helps to control the gradient in the square-root below
for qf in self.qfilters:
qsum = qsum + T.dot(x, qf)**2
qsum = qsum + T.dot(x, qf) ** 2
return T.dot(x, self.w1) + self.b1 + T.sqrt(qsum)
else:
return T.dot(x, self.w1) + self.b1
self.hidden_activation = _act(self.input) #noise-free hidden
self.hidden_activation = _act(self.input) # noise-free hidden
self.hidden = self.hid_activation_function(self.hidden_activation)
......@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# self.cost = self.cost + self.regularization
### WITH NOISE ###
self.corrupted_input = self.build_corrupted_input()
......@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# self.ncost = self.ncost + self.regularization
# GRADIENTS AND UPDATES
if self.tie_weights:
self.params = [self.w1, self.b1, self.b2] + self.qfilters
......@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
self.params = [self.w1, self.w2, self.b1, self.b2] + self.qfilters
gradients = T.grad(self.ncost, self.params)
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
updates = dict((p, p - self.lr * g) for p, g in zip(self.
params, gradients))
# INTERFACE METHODS
#self.update = theano.Method(self.input, self.ncost, updates)
......@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
filter's initial range)
"""
if (input_size is None) ^ (hidden_size is None):
raise ValueError("Must specify input_size and hidden_size or neither.")
raise ValueError(
"Must specify input_size and hidden_size or neither.")
super(QuadraticDenoisingAA, self)._instance_initialize(obj, {})
obj.random.initialize()
R = N.random.RandomState(unittest_tools.fetch_seed(seed))
if input_size is not None:
sz = (input_size, hidden_size)
inf = 1/N.sqrt(input_size)
hif = 1/N.sqrt(hidden_size)
obj.w1 = N.asarray(R.uniform(size = sz, low = -inf, high = inf),
inf = 1 / N.sqrt(input_size)
hif = 1 / N.sqrt(hidden_size)
obj.w1 = N.asarray(R.uniform(size=sz, low=-inf, high=inf),
dtype=config.floatX)
if not self.tie_weights:
obj.w2 = N.asarray(
......@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
def _instance_initialize(self, obj, input_size, hidden_size, noise_level, seed, lr, qfilter_relscale):
# obj.l2_coef = 0.0
obj.noise_level = N.asarray(noise_level, dtype=config.floatX)
super(SigmoidXEQuadraticDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale)
super(SigmoidXEQuadraticDenoisingAA, self)
._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale)
QDAA = SigmoidXEQuadraticDenoisingAA
class Loss01(object):
def loss_01(self, x, targ):
return N.mean(self.classify(x) != targ)
class Module_Nclass(module.FancyModule):
def _instance_initialize(mod_self, self, n_in, n_out, lr, seed):
#self.component is the LogisticRegressionTemplate instance that built this guy.
......@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
self.output_dimension = n_out
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
super(Module_Nclass, self).__init__() #boilerplate
super(Module_Nclass, self).__init__() # boilerplate
#self.x = module.Member(x) if x is not None else T.matrix('input')
if x is not None:
self.x = (x)
else: self.x = T.matrix('input')
else:
self.x = T.matrix('input')
#self.targ = module.Member(targ) if targ is not None else T.lvector()
if targ is not None:
self.targ = (targ)
else: self.targ = T.lvector()
else:
self.targ = T.lvector()
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
if w is not None:
self.w = (w)
else: self.w = (T.dmatrix())
else:
self.w = (T.dmatrix())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector())
if b is not None:
self.b = (b)
else: self.b = (T.dvector())
else:
self.b = (T.dvector())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
if lr is not None:
self.lr = (lr)
else: self.lr = (T.dscalar())
else:
self.lr = (T.dscalar())
self.params = [p for p in [self.w, self.b] if p.owner is None]
......@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
#self.update = module.Method([self.input, self.targ], sum_xent,
#updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
class ConvolutionalMLP(module.FancyModule):
def __init__(self,
window_size,
n_quadratic_filters,
activation_function,
reconstruction_cost_function,
tie_weights = False,
tie_weights=False,
# _input,
# _targ
):
......@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
self.input_representations = []
self.input_representations.append(QDAA(
input=self.inputs[0],
tie_weights = tie_weights,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
tie_weights=tie_weights,
n_quadratic_filters=n_quadratic_filters,
activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function
)
)
......@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
self.input_representations.append(
QDAA(
input=i,
tie_weights = tie_weights,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
tie_weights=tie_weights,
n_quadratic_filters=n_quadratic_filters,
activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function,
_w1 = self.input_representations[0].w1,
_w2 = self.input_representations[0].w2,
......@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
_qfilters = self.input_representations[0].qfilters
)
)
assert self.input_representations[-1].w1 is self.input_representations[0].w1
assert self.input_representations[-1]
.w1 is self.input_representations[0].w1
self.input_representation = T.concatenate([i.hidden for i in self.input_representations], axis=1)
self.input_representation = T.concatenate([i.
hidden for i in self.input_representations], axis=1)
self.hidden = QDAA(
input = self.input_representation,
tie_weights = tie_weights,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
input=self.input_representation,
tie_weights=tie_weights,
n_quadratic_filters=n_quadratic_filters,
activation_function=activation_function,
reconstruction_cost_function = reconstruction_cost_function
)
self.output = Module_Nclass(x=self.hidden.hidden, targ=self.targ)
......@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
self.hidden.b1,
self.hidden.b2
] + self.hidden.qfilters
input_pretraining_cost = sum(i.ncost for i in self.input_representations)
input_pretraining_cost = sum(i.ncost for i in self.
input_representations)
hidden_pretraining_cost = self.hidden.ncost
input_pretraining_gradients = T.grad(input_pretraining_cost,
input_pretraining_params)
hidden_pretraining_gradients = T.grad(hidden_pretraining_cost, hidden_pretraining_params)
hidden_pretraining_gradients = T.grad(
hidden_pretraining_cost, hidden_pretraining_params)
pretraining_updates = \
dict((p, p - self.lr * g) for p, g in \
zip(input_pretraining_params, input_pretraining_gradients) \
......@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
[self.output.w, self.output.b]
finetuning_cost = self.output.cost
finetuning_gradients = T.grad(finetuning_cost, finetuning_params)
finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients))
self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
finetuning_updates = dict((p, p - self.lr * g) for p,
g in zip(finetuning_params, finetuning_gradients))
self.finetuning_update = module.Method(self.inputs + [self.
targ], self.output.cost, finetuning_updates)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
......@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
# for layer in obj.layers:
# if layer.lr is None:
# layer.lr = lr
assert self.input_representations[-1] is not self.input_representations[0]
assert self.input_representations[-1].w1 is self.input_representations[0].w1
assert self.input_representations[-1]
is not self.input_representations[0]
assert self.input_representations[-1]
.w1 is self.input_representations[0].w1
for i in self.input_representations:
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
......@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
assert (i.w2 == self.input_representations[0].w2).all()
assert (i.b1 == self.input_representations[0].b1).all()
assert (i.b2 == self.input_representations[0].b2).all()
assert N.all((a==b).all() for a, b in zip(i.qfilters, self.input_representations[0].qfilters))
assert N.all((a == b).all() for a, b in zip(i.
qfilters, self.input_representations[0].qfilters))
self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size),
hidden_size=self.hidden_representation_size, noise_level=noise_level,
seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
self.output.initialize(n_in=self.hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
self.output.initialize(n_in=self.
hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
def create(window_size=3,
input_dimension=9,
......@@ -488,22 +514,24 @@ def create(window_size=3,
activation_function = T.tanh
architecture = ConvolutionalMLP( \
window_size = window_size,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
reconstruction_cost_function = quadratic,
tie_weights = False
window_size=window_size,
n_quadratic_filters=n_quadratic_filters,
activation_function=activation_function,
reconstruction_cost_function=quadratic,
tie_weights=False
)
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
model = architecture.make(input_size=input_dimension,
input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
return model
def create_realistic(window_size=3,#7,
def create_realistic(window_size=3, # 7,
input_dimension=200,
output_vocabsize=23,
n_quadratic_filters=2,
......@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
activation_function = T.tanh
architecture = ConvolutionalMLP( \
window_size = window_size,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
reconstruction_cost_function = quadratic,
tie_weights = False
window_size=window_size,
n_quadratic_filters=n_quadratic_filters,
activation_function=activation_function,
reconstruction_cost_function=quadratic,
tie_weights=False
)
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
model = architecture.make(input_size=input_dimension,
input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
return model
def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
optimizer=None, realistic=False):
#print "BUILDING MODEL"
......@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
if optimizer:
mode = theano.Mode(linker='c|py', optimizer=optimizer)
else: mode = get_default_mode()
else:
mode = get_default_mode()
if mode.__class__.__name__ == 'DebugMode':
iters_per_unsup=1
iters_per_sup =1
iters_per_unsup = 1
iters_per_sup = 1
if realistic:
m = create_realistic(compile_mode=mode)
......@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
for i, node in enumerate(m.pretraining_update.maker.fgraph.toposort()):
idx_of_node[node] = i
if False and i > -1:
print ' ', i, node, [(ii, idx_of_node.get(ii.owner, 'IN')) for ii in node.inputs]
print ' ', i, node, [(ii, idx_of_node.get(ii.
owner, 'IN')) for ii in node.inputs]
prog_str.append(str(node))
#print input_pretraining_gradients[4].owner.inputs
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
......@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
inputs = [rng.rand(10,m.input_size) for i in 1,2,3]
targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
inputs = [rng.rand(10, m.input_size) for i in 1, 2, 3]
targets = N.asarray([0, 3, 4, 2, 3, 4, 4, 2, 1, 0])
#print inputs
#print 'UNSUPERVISED PHASE'
......@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
#print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if iters_per_unsup == 3:
assert s0.startswith('0.927793')#'0.403044')
assert s1.startswith('0.068035')#'0.074898')
assert s0.startswith('0.927793') # '0.403044')
assert s1.startswith('0.068035') # '0.074898')
#print 'UNSUPERVISED took %.3fs'%(time.time() - t)
#print 'FINETUNING GRAPH'
......@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
assert 19.7042 < s0f and s0f < 19.7043
#print 'SUPERVISED took %.3fs'%( time.time() - t)
def jtest_main():
from theano import gof
JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
......@@ -609,13 +642,17 @@ def jtest_main():
optimizer = eval(sys.argv[1])
test_naacl_model(optimizer, 10, 10, realistic=False)
def real_main():
test_naacl_model()
def profile_main():
# This is the main function for profiling
# We've renamed our original main() above to real_main()
import cProfile, pstats, StringIO
import cProfile
import pstats
import StringIO
prof = cProfile.Profile()
prof = prof.runctx("real_main()", globals(), locals())
stream = StringIO.StringIO()
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
inputs = [theano.tensor.vector()]
outputs = [theano.tensor.vector()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz, = grads
......@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
def make_node(self, *inputs):
outputs = [theano.tensor.vector()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
return [ inputs[0].zeros_like() ]
return [inputs[0].zeros_like()]
i = theano.tensor.vector()
j = theano.tensor.vector()
a1 = retOne().make_node(i)
g = grad_sources_inputs([(a1.out, one)], None)
a2 = retOne().make_node(i,j)
a2 = retOne().make_node(i, j)
try:
g = grad_sources_inputs([(a2.out, one)], None)
except ValueError, e:
......@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def test_1in_1out(self):
"""Test grad is called correctly for a 1-to-1 op"""
gval = theano.tensor.matrix()
class O(gof.op.Op):
def make_node(self):
inputs = [theano.tensor.matrix()]
outputs = [theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval,
a1 = O().make_node()
......@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def test_1in_Nout(self):
"""Test grad is called correctly for a 1-to-many op"""
gval = theano.tensor.matrix()
class O(gof.op.Op):
def make_node(self):
inputs = [theano.tensor.matrix()]
outputs = [theano.tensor.scalar(),theano.tensor.scalar()]
outputs = [theano.tensor.scalar(), theano.tensor.scalar()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz1, gz2 = grads
......@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-1 op"""
gval0 = theano.tensor.scalar()
gval1 = theano.tensor.scalar()
class O(gof.op.Op):
def make_node(self):
inputs = [theano.tensor.scalar(), theano.tensor.scalar()]
outputs = [theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x0, x1 = inp
gz, = grads
......@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-many op"""
gval0 = theano.tensor.matrix()
gval1 = theano.tensor.matrix()
class O(gof.op.Op):
def make_node(self):
inputs = [theano.tensor.matrix(),theano.tensor.matrix()]
outputs = [theano.tensor.matrix(),theano.tensor.matrix()]
inputs = [theano.tensor.matrix(), theano.tensor.matrix()]
outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval0, gval1
a1 = O().make_node()
......@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
class O(gof.op.Op):
def __init__(self, tst):
self.tst = tst
def make_node(self, *inputs):
outputs = [theano.tensor.matrix(),theano.tensor.matrix()]
outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out):
return [one]
i = theano.tensor.matrix()
......@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
g = grad_sources_inputs([(a1.outputs[0], one)], None)
self.assertTrue(g[i] is one)
def test_unimplemented_grad_func():
# tests that function compilation catches unimplemented grads in the graph
a = theano.tensor.vector()
b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
try:
f = theano.function([a], b, on_unused_input = 'ignore')
f = theano.function([a], b, on_unused_input='ignore')
assert 0
except TypeError:
pass
def test_undefined_grad_func():
#tests that function compilation catches undefined grads in the graph
a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try:
f = theano.function([a],b, on_unused_input = 'ignore')
f = theano.function([a], b, on_unused_input='ignore')
assert 0
except TypeError:
pass
def test_unimplemented_grad_grad():
#tests that unimplemented grads are caught in the grad method
......@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
return gof.Apply(self, [x], [x.type()])
def grad(self, inputs, output_grads):
return [ theano.gradient.grad_not_implemented(self, 0, inputs[0]) ]
return [theano.gradient.grad_not_implemented(self, 0, inputs[0])]
a = theano.tensor.scalar()
b = DummyOp()(a)
try:
g = theano.gradient.grad(b,a)
g = theano.gradient.grad(b, a)
assert False
except TypeError:
pass
def test_undefined_grad_grad():
#tests that undefined grads are caught in the grad method
V = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False,False,False,False,False))()
broadcastable=(False, False, False, False, False))()
W = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False, False, False, False, False))()
broadcastable=(False, False, False, False, False))()
b = theano.tensor.vector()
d = theano.tensor.ivector()
Z = conv3D(V,W,b,d)
Z = conv3D(V, W, b, d)
try:
g = theano.gradient.grad(Z.sum(),d)
g = theano.gradient.grad(Z.sum(), d)
assert False
except TypeError:
pass
def test_grad_name():
A = theano.tensor.matrix('A')
x = theano.tensor.vector('x')
f = theano.tensor.dot(x,theano.tensor.dot(A,x))
f = theano.tensor.dot(x, theano.tensor.dot(A, x))
f.name = 'f'
g = theano.tensor.grad(f,x)
g = theano.tensor.grad(f, x)
assert g.name == '(df/dx)'
def test_grad_duplicate_input():
#test that the grad works when a variable
#appears in more than one place in a node's input list
def output(x):
return (x*x)
return (x * x)
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
theano.tests.unittest_tools.verify_grad(output,[vx])
theano.tests.unittest_tools.verify_grad(output, [vx])
def test_grad_quadratic():
#test the gradient on a tiny graph
def cost(x,A):
return theano.tensor.dot(x,theano.tensor.dot(A,x))
def cost(x, A):
return theano.tensor.dot(x, theano.tensor.dot(A, x))
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
vA = rng.randn(2,2)
vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(cost,[vx,vA])
theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
def test_grad_quadratic_vector():
#test the gradient on a small graph
def output(x,A):
return theano.tensor.dot(x*x,A)
def output(x, A):
return theano.tensor.dot(x * x, A)
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
vA = rng.randn(2,2)
vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output,[vx,vA])
theano.tests.unittest_tools.verify_grad(output, [vx, vA])
def test_grad_cubic():
#test the gradient on a bigger graph
def cost(x,A):
return theano.tensor.dot(x*x,theano.tensor.dot(A,x))
def cost(x, A):
return theano.tensor.dot(x * x, theano.tensor.dot(A, x))
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
vA = rng.randn(2,2)
vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
theano.tests.unittest_tools.verify_grad(cost,[vx,vA])
def test_grad_grad_quadratic():
#test the gradient on a graph constructed using the gradient
def output(x,A):
orig_cost = theano.tensor.dot(x,theano.tensor.dot(A,x))
def output(x, A):
orig_cost = theano.tensor.dot(x, theano.tensor.dot(A, x))
return theano.gradient.grad(orig_cost, x)
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
vA = rng.randn(2,2)
vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output, [vx, vA])
theano.tests.unittest_tools.verify_grad(output,[vx,vA])
def test_grad_grad_cubic():
#test the gradient on a bigger graph constructed using the gradient
def output(x,A):
orig_cost = theano.tensor.dot(x*x,theano.tensor.dot(A,x))
def output(x, A):
orig_cost = theano.tensor.dot(x * x, theano.tensor.dot(A, x))
return theano.gradient.grad(orig_cost, x)
rng = np.random.RandomState([2012,8,28])
rng = np.random.RandomState([2012, 8, 28])
vx = rng.randn(2)
vA = rng.randn(2,2)
vA = rng.randn(2, 2)
theano.tests.unittest_tools.verify_grad(output, [vx, vA])
theano.tests.unittest_tools.verify_grad(output,[vx,vA])
def test_grad_int():
......@@ -300,11 +320,11 @@ def test_grad_int():
b = theano.tensor.vector()
def make_grad_func(X):
Z = theano.tensor.dot(X,W) + b
Z = theano.tensor.dot(X, W) + b
H = theano.tensor.nnet.sigmoid(Z)
cost = H.sum()
g = gradient.grad(cost,X)
return theano.function([X,W,b],g, on_unused_input = 'ignore')
g = gradient.grad(cost, X)
return theano.function([X, W, b], g, on_unused_input='ignore')
int_func = make_grad_func(theano.tensor.imatrix())
#we have to use float64 as the float type to get the results to match
......@@ -314,17 +334,17 @@ def test_grad_int():
m = 5
d = 3
n = 4
rng = np.random.RandomState([2012,9,5])
rng = np.random.RandomState([2012, 9, 5])
int_type = theano.tensor.imatrix().dtype
float_type = 'float64'
X = np.cast[int_type](rng.randn(m,d) * 127.)
W = np.cast[W.dtype](rng.randn(d,n))
X = np.cast[int_type](rng.randn(m, d) * 127.)
W = np.cast[W.dtype](rng.randn(d, n))
b = np.cast[b.dtype](rng.randn(n))
int_result = int_func(X,W,b)
float_result = float_func(np.cast[float_type](X),W,b)
int_result = int_func(X, W, b)
float_result = float_func(np.cast[float_type](X), W, b)
assert np.allclose(int_result, float_result)
......@@ -333,23 +353,23 @@ def test_grad_disconnected():
#tests corner cases of gradient for shape and alloc
x = theano.tensor.vector(name = 'x')
x = theano.tensor.vector(name='x')
total = x.sum()
total.name = 'total'
num_elements = x.shape[0]
num_elements.name = 'num_elements'
silly_vector = theano.tensor.alloc( total / num_elements, num_elements)
silly_vector = theano.tensor.alloc(total / num_elements, num_elements)
silly_vector.name = 'silly_vector'
cost = silly_vector.sum()
cost.name = 'cost'
#note that cost simplifies to be the same as "total"
g = gradient.grad(cost, x, add_names = False)
g = gradient.grad(cost, x, add_names=False)
#we still need to pass in x because it determines the shape of the output
f = theano.function([x],g)
rng = np.random.RandomState([2012,9,5])
f = theano.function([x], g)
rng = np.random.RandomState([2012, 9, 5])
x = np.cast[x.dtype](rng.randn(3))
g = f(x)
assert np.allclose(g,np.ones(x.shape,dtype=x.dtype))
assert np.allclose(g, np.ones(x.shape, dtype=x.dtype))
def test_disconnected_nan():
......@@ -361,27 +381,27 @@ def test_disconnected_nan():
class Op1(theano.gof.Op):
def make_node(self, x):
return theano.Apply(self, inputs=[x],
outputs = [ x.type(), theano.tensor.scalar() ])
outputs=[x.type(), theano.tensor.scalar()])
def connection_pattern(self, node):
return [[True, False]]
def grad(self, inputs, output_grads):
return [ inputs[0].zeros_like() ]
return [inputs[0].zeros_like()]
# Op2 has two inputs, f and g
# Its gradient with respect to g is not defined
class Op2(theano.gof.Op):
def make_node(self, f, g):
return theano.Apply(self, inputs=[f,g],
outputs = [ theano.tensor.scalar() ])
return theano.Apply(self, inputs=[f, g],
outputs=[theano.tensor.scalar()])
def grad(self, inputs, output_grads):
return [ inputs[0].zeros_like(), NullType()() ]
return [inputs[0].zeros_like(), NullType()()]
x = theano.tensor.vector()
f, g = Op1()(x)
cost = Op2()(f,g)
cost = Op2()(f, g)
# cost is differentiable wrt x
# but we can't tell that without using Op1's connection pattern
......@@ -394,7 +414,6 @@ def test_disconnected_nan():
# connection_pattern functionality worked correctly
def test_sum_disconnected():
# Tests that we can add DisconnectedType to other terms correctly
......@@ -402,7 +421,7 @@ def test_sum_disconnected():
y = x * 2.
z = x + 1.
cost = y + z
theano.tensor.grad(cost, x, consider_constant=[y,z])
theano.tensor.grad(cost, x, consider_constant=[y, z])
# In an earlier version of theano, the above line would have failed
# while trying to add two DisconnectedTypes
......
......@@ -47,7 +47,7 @@ class BreakRop(Op):
out[0] = x
def grad(self, inp, grads):
return [ grad_undefined(self, 0, inp[0]) ]
return [grad_undefined(self, 0, inp[0])]
def R_op(self, inputs, eval_points):
return [None]
......@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
m_ = tensor.matrix('m_')
v_ = tensor.vector('v_')
mval = self.rng.uniform(size=(3,7)).astype(theano.config.floatX)
mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
m_val = self.rng.uniform(size=(3,7)).astype(theano.config.floatX)
m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
rop_out1 = tensor.Rop([m, v, m+v], [m, v], [m_, v_])
rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
assert isinstance(rop_out1, list)
assert len(rop_out1) == 3
rop_out2 = tensor.Rop((m, v, m+v), [m, v], [m_, v_])
rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
assert isinstance(rop_out2, tuple)
assert len(rop_out2) == 3
lop_out1 = tensor.Lop([m, v, m+v], (m, v), [m_, v_])
lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
assert isinstance(lop_out1, tuple)
assert len(lop_out1) == 2
lop_out2 = tensor.Lop((m, v, m+v), [m, v], [m_, v_])
lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
assert isinstance(lop_out2, list)
assert len(lop_out2) == 2
......@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
all_outs.extend(o)
f = theano.function([m, v, m_, v_], all_outs)
f(mval, vval, m_val, v_val)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论