提交 3231b8dd authored 作者: james@X40's avatar james@X40

merge

"""Driver of graph construction, optimization, and linking. """Driver of graph construction, optimization, and linking.
""" """
__docformat__ = "restructuredtext en"
import copy_reg import copy_reg
import cPickle import cPickle
......
差异被折叠。
#!/usr/bin/env python #!/usr/bin/env python
import numpy as N import numpy as N
from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile from theano import Op, Apply, tensor as T, Module, Method, Mode, compile
from theano.gof import OpSub, TopoOptimizer from theano.gof import OpSub, TopoOptimizer
from pylearn.algorithms.minimizer import make_minimizer # minimizer
from theano.printing import Print from theano.printing import Print
from theano.tests import unittest_tools from theano.tests import unittest_tools
#import sgd #until Olivier's module-import thing works better
#################### ####################
# Library-type stuff # Library-type stuff
...@@ -15,8 +13,6 @@ from theano.tests import unittest_tools ...@@ -15,8 +13,6 @@ from theano.tests import unittest_tools
from theano.compile import module from theano.compile import module
from theano import tensor as T from theano import tensor as T
from pylearn.algorithms.minimizer import minimizer_factory
class StochasticGradientDescent(module.FancyModule): class StochasticGradientDescent(module.FancyModule):
"""Fixed stepsize gradient descent""" """Fixed stepsize gradient descent"""
def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True): def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True):
...@@ -29,18 +25,18 @@ class StochasticGradientDescent(module.FancyModule): ...@@ -29,18 +25,18 @@ class StochasticGradientDescent(module.FancyModule):
self.stepsize_init = None self.stepsize_init = None
if stepsize is None: if stepsize is None:
self.stepsize = module.Member(T.dscalar()) self.stepsize = (T.dscalar())
elif isinstance(stepsize, T.TensorResult): elif isinstance(stepsize, T.TensorResult):
self.stepsize = stepsize self.stepsize = stepsize
else: else:
if self.WEIRD_STUFF: if self.WEIRD_STUFF:
#TODO: why is this necessary? why does the else clause not work? #TODO: why is this necessary? why does the else clause not work?
# self.stepsize = module.Member(T.dscalar(), init = stepsize) # self.stepsize = module.Member(T.dscalar(), init = stepsize)
self.stepsize = module.Member(T.dscalar()) self.stepsize = (T.dscalar())
self.stepsize_init = stepsize self.stepsize_init = stepsize
else: else:
# self.stepsize = module.Member(T.value(stepsize)) # self.stepsize = module.Member(T.value(stepsize))
self.stepsize = module.Member(T.constant(stepsize))#work! self.stepsize = (T.constant(stepsize))#work!
if self.stepsize.ndim != 0: if self.stepsize.ndim != 0:
raise ValueError('stepsize must be a scalar', stepsize) raise ValueError('stepsize must be a scalar', stepsize)
...@@ -63,7 +59,6 @@ class StochasticGradientDescent(module.FancyModule): ...@@ -63,7 +59,6 @@ class StochasticGradientDescent(module.FancyModule):
pass pass
@minimizer_factory('sgd')
def sgd_minimizer(stepsize=None, **args): def sgd_minimizer(stepsize=None, **args):
def m(i,c,p,g=None): def m(i,c,p,g=None):
return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args) return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
...@@ -101,6 +96,9 @@ class TanhRnn(Op): ...@@ -101,6 +96,9 @@ class TanhRnn(Op):
return Apply(self, [x, z0, A], [z]) return Apply(self, [x, z0, A], [z])
def perform(self, node, (x,z0,A), out): def perform(self, node, (x,z0,A), out):
assert x is not None
assert z0 is not None
assert A is not None
T,M = x.shape T,M = x.shape
z = N.zeros((T+1, M)) z = N.zeros((T+1, M))
z[0] = z0 z[0] = z0
...@@ -161,10 +159,10 @@ class ExampleRNN(Module): ...@@ -161,10 +159,10 @@ class ExampleRNN(Module):
self.n_vis = n_vis self.n_vis = n_vis
#recurrent weight matrix in latent space #recurrent weight matrix in latent space
self.z0 = Member(T.dvector()) self.z0 = (T.dvector())
self.w = Member(T.dmatrix()) self.w = (T.dmatrix())
self.params = [self.w] self.params = [self.z0, self.w]
#input and target #input and target
x, y = T.dmatrix(), T.dmatrix() x, y = T.dmatrix(), T.dmatrix()
...@@ -176,6 +174,7 @@ class ExampleRNN(Module): ...@@ -176,6 +174,7 @@ class ExampleRNN(Module):
self.minimizer = minimizer([x, y], self.cost, self.params) self.minimizer = minimizer([x, y], self.cost, self.params)
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'INITIALIZE EXAMPLE RNN'
n_vis = self.n_vis n_vis = self.n_vis
rng = N.random.RandomState(unittest_tools.fetch_seed(2342)) rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
...@@ -185,14 +184,14 @@ class ExampleRNN(Module): ...@@ -185,14 +184,14 @@ class ExampleRNN(Module):
obj.minimizer.initialize() obj.minimizer.initialize()
def test_example_rnn(): def test_example_rnn():
minimizer_fn = make_minimizer('sgd', stepsize = 0.001) minimizer_fn = sgd_minimizer(stepsize = 0.001)
n_vis = 5 n_vis = 5
n_out = 3 n_out = 3
n_hid = 4 n_hid = 4
rnn_module = ExampleRNN(n_vis, minimizer_fn) rnn_module = ExampleRNN(n_vis, minimizer_fn)
rnn = rnn_module.make(mode='FAST_RUN') rnn = rnn_module.make()
rng = N.random.RandomState(unittest_tools.fetch_seed(7722342)) rng = N.random.RandomState(unittest_tools.fetch_seed(7722342))
x = rng.randn(10,n_vis) x = rng.randn(10,n_vis)
...@@ -212,6 +211,7 @@ def test_example_rnn(): ...@@ -212,6 +211,7 @@ def test_example_rnn():
print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
else: else:
rnn.minimizer.step_cost(x, y) rnn.minimizer.step_cost(x, y)
assert rnn.minimizer.step_cost(x,y) < -20 #it starts around -.28
def test_WEIRD_STUFF(): def test_WEIRD_STUFF():
n_vis = 3 n_vis = 3
...@@ -224,8 +224,8 @@ def test_WEIRD_STUFF(): ...@@ -224,8 +224,8 @@ def test_WEIRD_STUFF():
LAG = 4 LAG = 4
y[LAG:] = x[:-LAG, 0:n_vis] y[LAG:] = x[:-LAG, 0:n_vis]
minimizer_fn1 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = False) minimizer_fn1 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = False)
minimizer_fn2 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = True) minimizer_fn2 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = True)
rnn_module1 = ExampleRNN(n_vis, minimizer_fn1) rnn_module1 = ExampleRNN(n_vis, minimizer_fn1)
rnn_module2 = ExampleRNN(n_vis, minimizer_fn2) rnn_module2 = ExampleRNN(n_vis, minimizer_fn2)
rnn1 = rnn_module1.make(mode='FAST_RUN') rnn1 = rnn_module1.make(mode='FAST_RUN')
......
...@@ -473,15 +473,6 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -473,15 +473,6 @@ class GemmLocalOptimizer(LocalOptimizer):
return [T.add(*new_add_inputs)] return [T.add(*new_add_inputs)]
return False return False
@staticmethod
def failure_callback(exc, nav, repl_pairs):
"""WRITEME"""
if not isinstance(exc, InconsistencyError):
traceback.print_exc()
else:
#print 'GEMM caused cycle, it happens.'
pass
@staticmethod @staticmethod
def _as_scalar(res): def _as_scalar(res):
"""Return None or a TensorResult whose type is in T.float_scalar_types""" """Return None or a TensorResult whose type is in T.float_scalar_types"""
...@@ -579,11 +570,11 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -579,11 +570,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and # TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer. # an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
......
...@@ -17,6 +17,8 @@ def cross_entropy(target, output, axis=1): ...@@ -17,6 +17,8 @@ def cross_entropy(target, output, axis=1):
@warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
""" """
return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis) return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
def quadratic(target, output, axis=1):
return T.mean(T.sqr(target - output), axis=axis)
class QuadraticDenoisingAA(module.Module): class QuadraticDenoisingAA(module.Module):
"""Quadratic de-noising Auto-encoder """Quadratic de-noising Auto-encoder
...@@ -70,27 +72,36 @@ class QuadraticDenoisingAA(module.Module): ...@@ -70,27 +72,36 @@ class QuadraticDenoisingAA(module.Module):
# ACQUIRE/MAKE INPUT # ACQUIRE/MAKE INPUT
if not input: if not input:
input = T.matrix('input') input = T.matrix('input')
self.input = theano.External(input) #self.input = theano.External(input)
self.input = (input)
# HYPER-PARAMETERS # HYPER-PARAMETERS
self.lr = theano.Member(T.scalar()) #self.lr = theano.Member(T.scalar())
self.lr = (T.scalar())
# PARAMETERS # PARAMETERS
if _qfilters is None: if _qfilters is None:
self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)] #self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
else: else:
self.qfilters = [theano.Member(q) for q in _qfilters] #self.qfilters = [theano.Member(q) for q in _qfilters]
self.qfilters = [(q) for q in _qfilters]
self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1) #self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
self.w1 = (T.matrix('w1')) if _w1 is None else (_w1)
if _w2 is None: if _w2 is None:
if not tie_weights: if not tie_weights:
self.w2 = theano.Member(T.matrix()) #self.w2 = theano.Member(T.matrix())
self.w2 = (T.matrix())
else: else:
self.w2 = self.w1.T self.w2 = self.w1.T
else: else:
self.w2 = theano.Member(_w2) #self.w2 = theano.Member(_w2)
self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1) self.w2 = (_w2)
self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2) #self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
self.b1 = (T.vector('b1')) if _b1 is None else (_b1)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
self.b2 = (T.vector('b2')) if _b2 is None else (_b2)
# # REGULARIZATION COST # # REGULARIZATION COST
# self.regularization = self.build_regularization() # self.regularization = self.build_regularization()
...@@ -212,7 +223,8 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA): ...@@ -212,7 +223,8 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
unittest_tools.seed_rng() unittest_tools.seed_rng()
def build_corrupted_input(self): def build_corrupted_input(self):
self.noise_level = theano.Member(T.scalar()) #self.noise_level = theano.Member(T.scalar())
self.noise_level = (T.scalar())
return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
def hid_activation_function(self, activation): def hid_activation_function(self, activation):
...@@ -262,12 +274,17 @@ class Module_Nclass(module.FancyModule): ...@@ -262,12 +274,17 @@ class Module_Nclass(module.FancyModule):
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False): def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
super(Module_Nclass, self).__init__() #boilerplate super(Module_Nclass, self).__init__() #boilerplate
self.x = module.Member(x) if x is not None else T.matrix('input') #self.x = module.Member(x) if x is not None else T.matrix('input')
self.targ = module.Member(targ) if targ is not None else T.lvector() self.x = (x) if x is not None else T.matrix('input')
#self.targ = module.Member(targ) if targ is not None else T.lvector()
self.targ = (targ) if targ is not None else T.lvector()
self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
self.b = module.Member(b) if b is not None else module.Member(T.dvector()) self.w = (w) if w is not None else (T.dmatrix())
self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) #self.b = module.Member(b) if b is not None else module.Member(T.dvector())
self.b = (b) if b is not None else (T.dvector())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
self.lr = (lr) if lr is not None else (T.dscalar())
self.params = [p for p in [self.w, self.b] if p.owner is None] self.params = [p for p in [self.w, self.b] if p.owner is None]
...@@ -309,6 +326,7 @@ class Module_Nclass(module.FancyModule): ...@@ -309,6 +326,7 @@ class Module_Nclass(module.FancyModule):
class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01): class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01):
#initialize is called by Module.make #initialize is called by Module.make
def initialize(self, input_size, input_representation_size, hidden_representation_size, output_size, lr, seed, noise_level, qfilter_relscale): def initialize(self, input_size, input_representation_size, hidden_representation_size, output_size, lr, seed, noise_level, qfilter_relscale):
print 'INITIALIZING'
# ASK JAMES: Is the following necessary? # ASK JAMES: Is the following necessary?
# super(ConvolutionalMLPInstance, self)._instance_initialize(obj, **kwargs) # super(ConvolutionalMLPInstance, self)._instance_initialize(obj, **kwargs)
...@@ -352,7 +370,8 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -352,7 +370,8 @@ class ConvolutionalMLP(module.FancyModule):
): ):
super(ConvolutionalMLP, self).__init__() super(ConvolutionalMLP, self).__init__()
self.lr = module.Member(T.scalar()) #self.lr = module.Member(T.scalar())
self.lr = (T.scalar())
self.inputs = [T.dmatrix() for i in range(window_size)] self.inputs = [T.dmatrix() for i in range(window_size)]
self.targ = T.lvector() self.targ = T.lvector()
...@@ -426,7 +445,7 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -426,7 +445,7 @@ class ConvolutionalMLP(module.FancyModule):
finetuning_cost = self.output.cost finetuning_cost = self.output.cost
finetuning_gradients = T.grad(finetuning_cost, finetuning_params) finetuning_gradients = T.grad(finetuning_cost, finetuning_params)
finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients)) finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients))
self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates) ###DEBUG: self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr]) #self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised) #self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
...@@ -445,13 +464,11 @@ def create(window_size=3, ...@@ -445,13 +464,11 @@ def create(window_size=3,
""" Create a convolutional model. """ """ Create a convolutional model. """
activation_function = T.tanh activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size = window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function, activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic, reconstruction_cost_function = quadratic,
tie_weights = False tie_weights = False
) )
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
...@@ -471,13 +488,11 @@ def create_realistic(window_size=3,#7, ...@@ -471,13 +488,11 @@ def create_realistic(window_size=3,#7,
""" Create a convolutional model. """ """ Create a convolutional model. """
activation_function = T.tanh activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size = window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function, activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic, reconstruction_cost_function = quadratic,
tie_weights = False tie_weights = False
) )
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论