提交 3231b8dd authored 作者: james@X40's avatar james@X40

merge

"""Driver of graph construction, optimization, and linking.
"""
__docformat__ = "restructuredtext en"
import copy_reg
import cPickle
......
差异被折叠。
#!/usr/bin/env python
import numpy as N
from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile
from theano import Op, Apply, tensor as T, Module, Method, Mode, compile
from theano.gof import OpSub, TopoOptimizer
from pylearn.algorithms.minimizer import make_minimizer # minimizer
from theano.printing import Print
from theano.tests import unittest_tools
#import sgd #until Olivier's module-import thing works better
####################
# Library-type stuff
......@@ -15,8 +13,6 @@ from theano.tests import unittest_tools
from theano.compile import module
from theano import tensor as T
from pylearn.algorithms.minimizer import minimizer_factory
class StochasticGradientDescent(module.FancyModule):
"""Fixed stepsize gradient descent"""
def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True):
......@@ -29,18 +25,18 @@ class StochasticGradientDescent(module.FancyModule):
self.stepsize_init = None
if stepsize is None:
self.stepsize = module.Member(T.dscalar())
self.stepsize = (T.dscalar())
elif isinstance(stepsize, T.TensorResult):
self.stepsize = stepsize
else:
if self.WEIRD_STUFF:
#TODO: why is this necessary? why does the else clause not work?
# self.stepsize = module.Member(T.dscalar(), init = stepsize)
self.stepsize = module.Member(T.dscalar())
self.stepsize = (T.dscalar())
self.stepsize_init = stepsize
else:
# self.stepsize = module.Member(T.value(stepsize))
self.stepsize = module.Member(T.constant(stepsize))#work!
self.stepsize = (T.constant(stepsize))#work!
if self.stepsize.ndim != 0:
raise ValueError('stepsize must be a scalar', stepsize)
......@@ -63,7 +59,6 @@ class StochasticGradientDescent(module.FancyModule):
pass
@minimizer_factory('sgd')
def sgd_minimizer(stepsize=None, **args):
def m(i,c,p,g=None):
return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
......@@ -101,6 +96,9 @@ class TanhRnn(Op):
return Apply(self, [x, z0, A], [z])
def perform(self, node, (x,z0,A), out):
assert x is not None
assert z0 is not None
assert A is not None
T,M = x.shape
z = N.zeros((T+1, M))
z[0] = z0
......@@ -161,10 +159,10 @@ class ExampleRNN(Module):
self.n_vis = n_vis
#recurrent weight matrix in latent space
self.z0 = Member(T.dvector())
self.w = Member(T.dmatrix())
self.z0 = (T.dvector())
self.w = (T.dmatrix())
self.params = [self.w]
self.params = [self.z0, self.w]
#input and target
x, y = T.dmatrix(), T.dmatrix()
......@@ -176,6 +174,7 @@ class ExampleRNN(Module):
self.minimizer = minimizer([x, y], self.cost, self.params)
def _instance_initialize(self, obj):
print 'INITIALIZE EXAMPLE RNN'
n_vis = self.n_vis
rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
......@@ -185,14 +184,14 @@ class ExampleRNN(Module):
obj.minimizer.initialize()
def test_example_rnn():
minimizer_fn = make_minimizer('sgd', stepsize = 0.001)
minimizer_fn = sgd_minimizer(stepsize = 0.001)
n_vis = 5
n_out = 3
n_hid = 4
rnn_module = ExampleRNN(n_vis, minimizer_fn)
rnn = rnn_module.make(mode='FAST_RUN')
rnn = rnn_module.make()
rng = N.random.RandomState(unittest_tools.fetch_seed(7722342))
x = rng.randn(10,n_vis)
......@@ -212,6 +211,7 @@ def test_example_rnn():
print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
else:
rnn.minimizer.step_cost(x, y)
assert rnn.minimizer.step_cost(x,y) < -20 #it starts around -.28
def test_WEIRD_STUFF():
n_vis = 3
......@@ -224,8 +224,8 @@ def test_WEIRD_STUFF():
LAG = 4
y[LAG:] = x[:-LAG, 0:n_vis]
minimizer_fn1 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = False)
minimizer_fn2 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = True)
minimizer_fn1 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = False)
minimizer_fn2 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = True)
rnn_module1 = ExampleRNN(n_vis, minimizer_fn1)
rnn_module2 = ExampleRNN(n_vis, minimizer_fn2)
rnn1 = rnn_module1.make(mode='FAST_RUN')
......
......@@ -473,15 +473,6 @@ class GemmLocalOptimizer(LocalOptimizer):
return [T.add(*new_add_inputs)]
return False
@staticmethod
def failure_callback(exc, nav, repl_pairs):
"""WRITEME"""
if not isinstance(exc, InconsistencyError):
traceback.print_exc()
else:
#print 'GEMM caused cycle, it happens.'
pass
@staticmethod
def _as_scalar(res):
"""Return None or a TensorResult whose type is in T.float_scalar_types"""
......@@ -579,11 +570,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm')
failure_callback=OpKeyOptimizer.warn_inplace), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm')
failure_callback=OpKeyOptimizer.warn_inplace), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm')
failure_callback=OpKeyOptimizer.warn_inplace), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated):
"""Compute a matrix-matrix product.
......
......@@ -17,6 +17,8 @@ def cross_entropy(target, output, axis=1):
@warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
"""
return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
def quadratic(target, output, axis=1):
return T.mean(T.sqr(target - output), axis=axis)
class QuadraticDenoisingAA(module.Module):
"""Quadratic de-noising Auto-encoder
......@@ -70,27 +72,36 @@ class QuadraticDenoisingAA(module.Module):
# ACQUIRE/MAKE INPUT
if not input:
input = T.matrix('input')
self.input = theano.External(input)
#self.input = theano.External(input)
self.input = (input)
# HYPER-PARAMETERS
self.lr = theano.Member(T.scalar())
#self.lr = theano.Member(T.scalar())
self.lr = (T.scalar())
# PARAMETERS
if _qfilters is None:
self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
else:
self.qfilters = [theano.Member(q) for q in _qfilters]
#self.qfilters = [theano.Member(q) for q in _qfilters]
self.qfilters = [(q) for q in _qfilters]
self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
self.w1 = (T.matrix('w1')) if _w1 is None else (_w1)
if _w2 is None:
if not tie_weights:
self.w2 = theano.Member(T.matrix())
#self.w2 = theano.Member(T.matrix())
self.w2 = (T.matrix())
else:
self.w2 = self.w1.T
else:
self.w2 = theano.Member(_w2)
self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
#self.w2 = theano.Member(_w2)
self.w2 = (_w2)
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
self.b1 = (T.vector('b1')) if _b1 is None else (_b1)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
self.b2 = (T.vector('b2')) if _b2 is None else (_b2)
# # REGULARIZATION COST
# self.regularization = self.build_regularization()
......@@ -212,7 +223,8 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
unittest_tools.seed_rng()
def build_corrupted_input(self):
self.noise_level = theano.Member(T.scalar())
#self.noise_level = theano.Member(T.scalar())
self.noise_level = (T.scalar())
return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
def hid_activation_function(self, activation):
......@@ -262,12 +274,17 @@ class Module_Nclass(module.FancyModule):
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
super(Module_Nclass, self).__init__() #boilerplate
self.x = module.Member(x) if x is not None else T.matrix('input')
self.targ = module.Member(targ) if targ is not None else T.lvector()
#self.x = module.Member(x) if x is not None else T.matrix('input')
self.x = (x) if x is not None else T.matrix('input')
#self.targ = module.Member(targ) if targ is not None else T.lvector()
self.targ = (targ) if targ is not None else T.lvector()
self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
self.b = module.Member(b) if b is not None else module.Member(T.dvector())
self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
self.w = (w) if w is not None else (T.dmatrix())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector())
self.b = (b) if b is not None else (T.dvector())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
self.lr = (lr) if lr is not None else (T.dscalar())
self.params = [p for p in [self.w, self.b] if p.owner is None]
......@@ -309,6 +326,7 @@ class Module_Nclass(module.FancyModule):
class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01):
#initialize is called by Module.make
def initialize(self, input_size, input_representation_size, hidden_representation_size, output_size, lr, seed, noise_level, qfilter_relscale):
print 'INITIALIZING'
# ASK JAMES: Is the following necessary?
# super(ConvolutionalMLPInstance, self)._instance_initialize(obj, **kwargs)
......@@ -352,7 +370,8 @@ class ConvolutionalMLP(module.FancyModule):
):
super(ConvolutionalMLP, self).__init__()
self.lr = module.Member(T.scalar())
#self.lr = module.Member(T.scalar())
self.lr = (T.scalar())
self.inputs = [T.dmatrix() for i in range(window_size)]
self.targ = T.lvector()
......@@ -426,7 +445,7 @@ class ConvolutionalMLP(module.FancyModule):
finetuning_cost = self.output.cost
finetuning_gradients = T.grad(finetuning_cost, finetuning_params)
finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients))
self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
###DEBUG: self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
......@@ -445,13 +464,11 @@ def create(window_size=3,
""" Create a convolutional model. """
activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \
window_size = window_size,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic,
reconstruction_cost_function = quadratic,
tie_weights = False
)
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
......@@ -471,13 +488,11 @@ def create_realistic(window_size=3,#7,
""" Create a convolutional model. """
activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \
window_size = window_size,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic,
reconstruction_cost_function = quadratic,
tie_weights = False
)
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论