提交 4e6fd916 authored 作者: james@X40's avatar james@X40

merge

"""Driver of graph construction, optimization, and linking. """Driver of graph construction, optimization, and linking.
""" """
__docformat__ = "restructuredtext en"
import copy_reg import copy_reg
import cPickle import cPickle
......
...@@ -19,7 +19,7 @@ This structure contains numbers and functions, and is ready for computation. ...@@ -19,7 +19,7 @@ This structure contains numbers and functions, and is ready for computation.
""" """
__doc__='restructuredtext en' __docformat__ = "restructuredtext en"
from theano import gof from theano import gof
from theano.printing import pprint from theano.printing import pprint
...@@ -32,14 +32,14 @@ import function_module as F ...@@ -32,14 +32,14 @@ import function_module as F
from mode import default_mode from mode import default_mode
def join(*args): def name_join(*args):
""" """
Creates a string representation for the given names: Creates a string representation for the given names:
join('a', 'b', 'c') => 'a.b.c' join('a', 'b', 'c') => 'a.b.c'
""" """
return ".".join(arg for arg in args if arg) return ".".join(arg for arg in args if arg)
def split(sym, n=-1): def name_split(sym, n=-1):
""" """
Gets the names from their joined representation Gets the names from their joined representation
split('a.b.c') => ['a', 'b', 'c'] split('a.b.c') => ['a', 'b', 'c']
...@@ -55,7 +55,7 @@ def canonicalize(name): ...@@ -55,7 +55,7 @@ def canonicalize(name):
[Fred: why we return the right type? Why int only?] [Fred: why we return the right type? Why int only?]
""" """
if isinstance(name, str): if isinstance(name, str):
name = split(name) name = name_split(name)
def convert(x): def convert(x):
try: try:
return int(x) return int(x)
...@@ -63,7 +63,6 @@ def canonicalize(name): ...@@ -63,7 +63,6 @@ def canonicalize(name):
return x return x
return map(convert, name) return map(convert, name)
class AllocationError(Exception): class AllocationError(Exception):
""" """
Exception raised when a Result has no associated storage. Exception raised when a Result has no associated storage.
...@@ -116,7 +115,7 @@ class Component(object): ...@@ -116,7 +115,7 @@ class Component(object):
else: else:
raise BindError("%s is already bound to %s as %s" % (self, self.parent, self.name)) raise BindError("%s is already bound to %s as %s" % (self, self.parent, self.name))
self.parent = parent self.parent = parent
self.name = join(parent.name, name) self.name = name_join(parent.name, name)
return self return self
def bound(self): def bound(self):
...@@ -302,41 +301,95 @@ class Member(_RComponent): ...@@ -302,41 +301,95 @@ class Member(_RComponent):
""" """
return memo[self.r].value return memo[self.r].value
class Method(Component): class Method(Component):
def __init__(self, inputs, outputs, updates = {}, kits = [], **kwupdates):
""" """
Method is a declaration of a function. It contains inputs, Method is a declaration of a function. It contains inputs,
outputs and updates. If the Method is part of a Composite outputs and updates. If the Method is part of a Composite
which holds references to Members, the Method may use them which holds references to Members, the Method may use them
without declaring them in the inputs, outputs or updates list. without declaring them in the inputs, outputs or updates list.
[TODO: remove references to kits, for they are not really
needed anymore]
inputs, outputs or updates may be strings. In that case, they inputs, outputs or updates may be strings. In that case, they
will be resolved in the Composite which is the parent of this will be resolved in the Composite which is the parent of this
Method. Method.
Method builds a Function (same structure as a call to Method builds a Function (same structure as a call to
theano.function) theano.function)
"""
inputs = []
"""function inputs (see `compile.function`)
If Module members are named explicitly in this list, then they will not use shared storage.
Storage must be provided either via an `io.In` value argument, or at the point of the
function call.
"""
outputs=None
"""function outputs (see `compile.function`)"""
updates = {}
"""update expressions for module members
If this method should update the shared storage value for a Module member, then the
update expression must be given in this dictionary.
Keys in this dictionary must be members of the module graph--results for which this Method
will use the shared storage.
The value associated with each key should be a Result (or a string that can be resolved to
a Result) representing the computation of a new value for this shared storage after
each function call.
"""
mode=None
"""This will override the Module compilation mode for this Method"""
def __init__(self, inputs, outputs, updates = {}, mode=None, **kwupdates):
"""Initialize attributes
:param inputs: value for `Method.inputs`
:param outputs: value for `Method.outputs`
:param updates: value for `Method.updates`
:param kwupdates: additions to `updates`
:param mode: value for `Method.mode`
:type inputs: list of (str or `Result` or `io.In`)
:type outputs: None or str or `Result` or `io.Out` or list of (str or `Result` or
`io.Out`)
:type updates: dict of `Result` or str -> `Result` or str
:type kwupdates: extra updates
:type mode: None or any mode accepted by `compile.function`
""" """
super(Method, self).__init__() super(Method, self).__init__()
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
self.updates = dict(updates, **kwupdates) self.updates = dict(updates, **kwupdates)
self.kits = list(kits) self.mode = mode
def bind(self, parent, name, dup_ok=True): def bind(self, parent, name, dup_ok=True):
"""Implement`Component.bind`"""
rval = super(Method, self).bind(parent, name, dup_ok=dup_ok) rval = super(Method, self).bind(parent, name, dup_ok=dup_ok)
rval.resolve_all() rval.resolve_all()
return rval return rval
def resolve(self, name): def resolve(self, name):
""" """Return the Result corresponding to a given name
Resolves the name of an input or output in the parent.
:param name: the name of a Result in the Module to which this Method is bound
:type name: str
:rtype: `Result`
""" """
if not self.bound(): if not self.bound():
raise ValueError('Trying to resolve a name on an unbound Method.') raise ValueError('Trying to resolve a name on an unbound Method.')
...@@ -345,35 +398,48 @@ class Method(Component): ...@@ -345,35 +398,48 @@ class Method(Component):
raise TypeError('Expected a Component with subtype Member or External.') raise TypeError('Expected a Component with subtype Member or External.')
return result return result
def resolve_result(self, x): def resolve_all(self):
if isinstance(x, gof.Result): """Convert all inputs, outputs, and updates specified as strings to Results.
This works by searching the attribute list of the Module to which this Method is bound.
"""
def resolve_result(x, passthrough=(gof.Result)):
if isinstance(x, passthrough):
return x return x
elif isinstance(x, _RComponent): elif isinstance(x, _RComponent):
return x.r return x.r
else: else:
return self.resolve(x).r return self.resolve(x).r
def resolve_all(self): def resolve_inputs():
""" if isinstance(self.inputs, (io.In, gof.Result, str)):
Resolves all inputs, outputs and updates that were given as
strings so that the fields contain the corresponding Result
instances instead.
"""
if isinstance(self.inputs, (gof.Result, str)):
inputs = [self.inputs] inputs = [self.inputs]
else: else:
inputs = list(self.inputs) inputs = list(self.inputs)
self.inputs = [self.resolve_result(input) for input in inputs] self.inputs = [resolve_result(input,
if isinstance(self.outputs, (list, tuple, ComponentList)): passthrough=(gof.Result, io.In)) for input in inputs]
self.outputs = [self.resolve_result(output) for output in self.outputs]
def resolve_outputs():
if isinstance(self.outputs, (io.Out, gof.Result, str, type(None))):
output = self.outputs
self.outputs = resolve_result(output,
passthrough=(gof.Result, io.Out, type(None)))
else: else:
self.outputs = self.resolve_result(self.outputs) outputs = list(self.outputs)
self.outputs = [resolve_result(output,
passthrough=(gof.Result, io.Out)) for output in outputs]
def resolve_updates():
updates = self.updates updates = self.updates
self.updates = {} self.updates = {}
for k, v in updates.iteritems(): for k, v in updates.iteritems():
k, v = self.resolve_result(k), self.resolve_result(v) k, v = resolve_result(k), resolve_result(v)
self.updates[k] = v self.updates[k] = v
resolve_inputs()
resolve_outputs()
resolve_updates()
def allocate(self, memo): def allocate(self, memo):
""" """
Method allocates nothing. Method allocates nothing.
...@@ -381,13 +447,21 @@ class Method(Component): ...@@ -381,13 +447,21 @@ class Method(Component):
return None return None
def build(self, mode, memo, allocate_all = False): def build(self, mode, memo, allocate_all = False):
""" """Compile a function for this Method.
Produces a function. If allocate_all is True, storage will be
allocated for all needed Results, even if there is no :param allocate_all: if True, storage will be
allocated for all needed Results even if there is no
associated storage for them in the memo. If allocate_all is associated storage for them in the memo. If allocate_all is
False, storage will only be allocated for Results that are False, storage will only be allocated for Results that are
reachable from the inputs list. reachable from the inputs list.
:returns: a function that implements this method
:rtype: `Function` instance
""" """
if self in memo:
return memo[self]
self.resolve_all() # resolve all so we don't have to mess with strings self.resolve_all() # resolve all so we don't have to mess with strings
def get_storage(r, require = False): def get_storage(r, require = False):
# If require is True, we can only get storage from the memo. # If require is True, we can only get storage from the memo.
...@@ -399,24 +473,56 @@ class Method(Component): ...@@ -399,24 +473,56 @@ class Method(Component):
' Verify that it is indeed a Member of the' ' Verify that it is indeed a Member of the'
' enclosing module or of one of its submodules.' % (r, self.name, self)) ' enclosing module or of one of its submodules.' % (r, self.name, self))
else: else:
return io.In(result = r, value = gof.Container(r, storage = [None]), mutable = False) return io.In(result=r,
# Wrap the inputs in In instances. TODO: allow the inputs to _be_ In instances value=gof.Container(r, storage=[None]),
mutable=False)
inputs = self.inputs inputs = self.inputs
inputs = [io.In(result = input,
value = get_storage(input).value, # Deal with explicit inputs
mutable = False) inputs = []
for input in inputs] for input in self.inputs:
# Add the members to update to the inputs. TODO: see above if type(input) is io.In:
inputs += [io.In(result = k, inputs.append(input)
update = v, elif isinstance(input, gof.Result):
value = get_storage(k, not allocate_all).value, input_in = io.In(
mutable = True, result=input,
strict = True) mutable=False)
for k, v in self.updates.iteritems()] inputs.append(input_in)
else:
raise TypeError(input, type(input))
# Deal with updates to shared storage
for k, v in self.updates.iteritems():
assert isinstance(k, gof.Result)
assert isinstance(v, gof.Result)
#identify an input for result k
input_k = None
for input in inputs:
if input.result == k:
input_k = input
#print 'METHOD UPDATE', k, v, input_k
if input_k is None:
# this is an implicit input,
# use shared storage
input_k = io.In(
result=k,
update=v,
value=get_storage(k, not allocate_all).value,
mutable=True)
inputs.append(input_k)
else:
raise ValueError(('Result listed in both inputs and updates.'
' Use inputs to use your own storage, use updates to '
'work on module-shared storage'), k)
outputs = self.outputs outputs = self.outputs
_inputs = [x.result for x in inputs] _inputs = [x.result for x in inputs]
# Grab the results that are not accessible from either the inputs or the updates. # Grab the results that are not accessible from either the inputs or the updates.
for input in gof.graph.inputs((list(outputs) if isinstance(outputs, (list, tuple)) else [outputs]) outputs_list = list(outputs) if isinstance(outputs, (list, tuple)) else [outputs]
outputs_result_list = [o.result if isinstance(o, io.Out) else o for o in outputs_list]
for input in gof.graph.inputs(outputs_result_list
+ [x.update for x in inputs if getattr(x, 'update', False)], + [x.update for x in inputs if getattr(x, 'update', False)],
blockers = _inputs): blockers = _inputs):
if input not in _inputs: if input not in _inputs:
...@@ -424,12 +530,18 @@ class Method(Component): ...@@ -424,12 +530,18 @@ class Method(Component):
# but otherwise they are immutable. # but otherwise they are immutable.
if isinstance(input, gof.Value): # and not isinstance(input, gof.Constant): if isinstance(input, gof.Value): # and not isinstance(input, gof.Constant):
storage = get_storage(input) storage = get_storage(input)
storage.value = input.data assert type(storage) is io.In
container = storage.value
container.value = input.data
else: else:
storage = get_storage(input, not allocate_all) storage = get_storage(input, not allocate_all)
assert type(storage) is io.In
inputs.append(storage) inputs.append(storage)
return F.function(inputs, outputs, mode) effective_mode = mode if self.mode is None else self.mode
rval = F.function(inputs, outputs, effective_mode)
memo[self] = rval
return rval
def pretty(self, **kwargs): def pretty(self, **kwargs):
self.resolve_all() self.resolve_all()
...@@ -458,17 +570,15 @@ class Method(Component): ...@@ -458,17 +570,15 @@ class Method(Component):
def dup(self): def dup(self):
self.resolve_all() self.resolve_all()
return self.__class__(list(self.inputs), return self.__class__(inputs=list(self.inputs),
list(self.outputs) if isinstance(self.outputs, list) else self.outputs, outputs=list(self.outputs) if isinstance(self.outputs, list) else self.outputs,
dict(self.updates), updates=dict(self.updates),
list(self.kits)) mode=self.mode)
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
raise TypeError("'Method' object is not callable" raise TypeError("'Method' object is not callable"
" (Hint: compile your module first. See Component.make())") " (Hint: compile your module first. See Component.make())")
class CompositeInstance(object): class CompositeInstance(object):
""" """
Generic type which various Composite subclasses are intended to Generic type which various Composite subclasses are intended to
...@@ -579,6 +689,7 @@ class Composite(Component): ...@@ -579,6 +689,7 @@ class Composite(Component):
def __getitem__(self, item): def __getitem__(self, item):
# Uses get() internally # Uses get() internally
print 'COMPOSITE GETITEM', item
x = self.get(item) x = self.get(item)
if isinstance(x, (External, Member)): if isinstance(x, (External, Member)):
return x.r return x.r
...@@ -617,6 +728,8 @@ class ComponentList(Composite): ...@@ -617,6 +728,8 @@ class ComponentList(Composite):
_components = _components[0] _components = _components[0]
self._components = [] self._components = []
for c in _components: for c in _components:
if not isinstance(c, Component):
raise TypeError(c, type(c))
self.append(c) self.append(c)
def resolve(self, name): def resolve(self, name):
...@@ -713,18 +826,15 @@ def default_initialize(self, init = {}, **kwinit): ...@@ -713,18 +826,15 @@ def default_initialize(self, init = {}, **kwinit):
for k, initv in dict(init, **kwinit).iteritems(): for k, initv in dict(init, **kwinit).iteritems():
self[k] = initv self[k] = initv
class ComponentDictInstance(CompositeInstance): class ComponentDictInstanceNoInit(CompositeInstance):
""" """Component Instance that allows new items to be added"""
ComponentDictInstance is meant to be instantiated by ComponentDict.
"""
def __setitem__(self, item, value): def __setitem__(self, item, value):
if item not in self.__items__: if item not in self.__items__:
# Set it if it's not there # Set it if it's not there
# TODO: is this needed here? move to ModuleInstance? # TODO: is this needed here? move to ModuleInstance?
self.__items__[item] = value self.__items__[item] = value
return else:
super(ComponentDictInstance, self).__setitem__(item, value) super(ComponentDictInstanceNoInit, self).__setitem__(item, value)
def __str__(self): def __str__(self):
strings = [] strings = []
...@@ -737,14 +847,30 @@ class ComponentDictInstance(CompositeInstance): ...@@ -737,14 +847,30 @@ class ComponentDictInstance(CompositeInstance):
return '{%s}' % '\n'.join(strings).replace('\n', '\n ') return '{%s}' % '\n'.join(strings).replace('\n', '\n ')
class ComponentDictInstance(ComponentDictInstanceNoInit):
"""
ComponentDictInstance is meant to be instantiated by ComponentDict.
"""
def initialize(self, init={}, **kwinit):
for k, initv in dict(init, **kwinit).iteritems():
self[k] = initv
class ComponentDict(Composite): class ComponentDict(Composite):
InstanceType = ComponentDictInstance # Type used by build() to make the instance InstanceType = ComponentDictInstance # Type used by build() to make the instance
def __init__(self, components = {}, **kwcomponents): def __init__(self, components = {}, **kwcomponents):
super(ComponentDict, self).__init__() super(ComponentDict, self).__init__()
components = dict(components, **kwcomponents) components = dict(components, **kwcomponents)
for val in components.itervalues():
if not isinstance(val, Component):
raise TypeError(val, type(val))
self.__dict__['_components'] = components self.__dict__['_components'] = components
def resolve(self, name): def resolve(self, name):
name = canonicalize(name) name = canonicalize(name)
item = self.get(name[0]) item = self.get(name[0])
...@@ -804,22 +930,35 @@ __autowrappers = [] ...@@ -804,22 +930,35 @@ __autowrappers = []
def register_wrapper(condition, wrapper): def register_wrapper(condition, wrapper):
__autowrappers.append((condition, wrapper)) __autowrappers.append((condition, wrapper))
def wrapper(x):
"""Returns a wrapper function appropriate for `x`
Returns None if not appropriate wrapper is found
"""
for condition, wrap_fn in __autowrappers:
if condition(x):
return wrap_fn
return None
def wrap(x): def wrap(x):
""" """
Wraps x in a Component. Wrappers can be registered using Wraps x in a Component. Wrappers can be registered using
register_wrapper to allow wrapping more types. register_wrapper to allow wrapping more types.
""" """
if isinstance(x, Component): w = wrapper(x)
return x if w is not None:
for condition, wrapper in __autowrappers: return w(x)
if condition(x): else:
return wrapper(x)
return x return x
def dict_wrap(d): def dict_wrap(d):
d_copy = {}
for k,v in d.iteritems(): for k,v in d.iteritems():
d[k]=wrap(v) d_copy[k]=wrap(v)
return d return d_copy
# Component -> itself
register_wrapper(lambda x: isinstance(x, Component),
lambda x: x)
# Result -> Member # Result -> Member
register_wrapper(lambda x: isinstance(x, gof.Result) and not x.owner, register_wrapper(lambda x: isinstance(x, gof.Result) and not x.owner,
...@@ -831,13 +970,12 @@ register_wrapper(lambda x: isinstance(x, gof.Result) and x.owner, ...@@ -831,13 +970,12 @@ register_wrapper(lambda x: isinstance(x, gof.Result) and x.owner,
# [[Result1], {Result2}, Result3...] -> ComponentList(Member(Result1), Member(Result2), ...) # [[Result1], {Result2}, Result3...] -> ComponentList(Member(Result1), Member(Result2), ...)
register_wrapper(lambda x: isinstance(x, (list, tuple)) \ register_wrapper(lambda x: isinstance(x, (list, tuple)) \
and all(isinstance(r, (gof.Result,Component,list, and all(wrapper(r) is not None for r in x),
tuple, dict)) for r in x),
lambda x: ComponentList(*map(wrap, x))) lambda x: ComponentList(*map(wrap, x)))
#{ "name1":{Component,Result,list,tuple,dict},...} -> ComponentDict({Component,Result,list,tuple,dict},...) #{ "name1":{Component,Result,list,tuple,dict},...} -> ComponentDict({Component,Result,list,tuple,dict},...)
register_wrapper(lambda x: isinstance(x, dict) \ register_wrapper(lambda x: isinstance(x, dict) \
and all(isinstance(r,(Component,gof.Result,list,tuple,dict)) for r in x.itervalues()), and all(wrapper(r) is not None for r in x.itervalues()),
lambda x: ComponentDict(dict_wrap(x))) lambda x: ComponentDict(dict_wrap(x)))
class Curry: class Curry:
...@@ -855,7 +993,7 @@ class Curry: ...@@ -855,7 +993,7 @@ class Curry:
self.meth = getattr(self.obj, self.name) self.meth = getattr(self.obj, self.name)
class ModuleInstance(ComponentDictInstance): class ModuleInstance(ComponentDictInstanceNoInit):
""" """
WRITEME WRITEME
...@@ -913,28 +1051,31 @@ class Module(ComponentDict): ...@@ -913,28 +1051,31 @@ class Module(ComponentDict):
self.__set_name__(value) self.__set_name__(value)
return return
def remove_member(v): def unpack_member_and_external(v):
if isinstance(v, (Member, External)): if isinstance(v, (Member, External)):
print >> sys.stderr, ("WARNING: assignment of Member or External "
"objects (either directly or indirectly) to Module "
"is deprecated. Just use Result.")
return v.r return v.r
elif isinstance(v, (gof.Result,Method,Module)): elif isinstance(v, (gof.Result,Method,Module)):
return v return v
elif isinstance(v,(int,bool)): elif isinstance(v,(int,bool)):
return v return v
elif isinstance(v, (list)): elif isinstance(v, (list)):
return map(remove_member,v) return map(unpack_member_and_external,v)
elif isinstance(v, (tuple)): elif isinstance(v, (tuple)):
return tuple(map(remove_member,v)) return tuple(map(unpack_member_and_external,v))
elif isinstance(v,dict): elif isinstance(v,dict):
v_copy = dict()
for k,vv in v.iteritems(): for k,vv in v.iteritems():
v[k]=remove_member(vv) v_copy[k]=unpack_member_and_external(vv)
return v return v
else: else:
# raise NotImplementedError # raise NotImplementedError
# print "WARNING: unknow:",v # print "WARNING: unknow:",v
return v return v
value=remove_member(value) value=unpack_member_and_external(value)
if not hasattr(self,"local_attr"): if not hasattr(self,"local_attr"):
self.__dict__["local_attr"]={} self.__dict__["local_attr"]={}
self.__dict__["local_attr_order"]=[] self.__dict__["local_attr_order"]=[]
...@@ -946,11 +1087,21 @@ class Module(ComponentDict): ...@@ -946,11 +1087,21 @@ class Module(ComponentDict):
for k,v in list(self.local_attr_order): #.iteritems(): for k,v in list(self.local_attr_order): #.iteritems():
self.__setattr__(k,v) self.__setattr__(k,v)
inst = super(Module, self).build(mode, memo) inst = super(Module, self).build(mode, memo)
for method in dir(self): if not isinstance(inst, ModuleInstance):
raise TypeError('The InstanceType of a Module should inherit from ModuleInstance',
(self, type(inst)))
for methodname in dir(self):
# Any method with a name like '_instance_XXX' is added to # Any method with a name like '_instance_XXX' is added to
# the object built under the name obj.XXX # the object built under the name obj.XXX
if method.startswith('_instance_'): if methodname.startswith('_instance_'):
setattr(inst, method[10:], Curry(self, method, inst)) new_methodname = methodname[len('_instance_'):]
if not hasattr(inst, new_methodname):
curried = Curry(self, methodname, inst)
# setattr doesn't work here because we overrode __setattr__
# setattr(inst, new_methodname, curried)
inst.__dict__[new_methodname] = curried
assert getattr(inst, new_methodname) == curried
#print 'ADDING METHOD', method, 'to', id(inst), new_methodname, getattr(inst, new_methodname)
return inst return inst
def _instance_initialize(self, inst, init = {}, **kwinit): def _instance_initialize(self, inst, init = {}, **kwinit):
......
#!/usr/bin/env python #!/usr/bin/env python
import numpy as N import numpy as N
from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile from theano import Op, Apply, tensor as T, Module, Method, Mode, compile
from theano.gof import OpSub, TopoOptimizer from theano.gof import OpSub, TopoOptimizer
from pylearn.algorithms.minimizer import make_minimizer # minimizer
from theano.printing import Print from theano.printing import Print
from theano.tests import unittest_tools from theano.tests import unittest_tools
#import sgd #until Olivier's module-import thing works better
#################### ####################
# Library-type stuff # Library-type stuff
...@@ -15,8 +13,6 @@ from theano.tests import unittest_tools ...@@ -15,8 +13,6 @@ from theano.tests import unittest_tools
from theano.compile import module from theano.compile import module
from theano import tensor as T from theano import tensor as T
from pylearn.algorithms.minimizer import minimizer_factory
class StochasticGradientDescent(module.FancyModule): class StochasticGradientDescent(module.FancyModule):
"""Fixed stepsize gradient descent""" """Fixed stepsize gradient descent"""
def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True): def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True):
...@@ -29,18 +25,18 @@ class StochasticGradientDescent(module.FancyModule): ...@@ -29,18 +25,18 @@ class StochasticGradientDescent(module.FancyModule):
self.stepsize_init = None self.stepsize_init = None
if stepsize is None: if stepsize is None:
self.stepsize = module.Member(T.dscalar()) self.stepsize = (T.dscalar())
elif isinstance(stepsize, T.TensorResult): elif isinstance(stepsize, T.TensorResult):
self.stepsize = stepsize self.stepsize = stepsize
else: else:
if self.WEIRD_STUFF: if self.WEIRD_STUFF:
#TODO: why is this necessary? why does the else clause not work? #TODO: why is this necessary? why does the else clause not work?
# self.stepsize = module.Member(T.dscalar(), init = stepsize) # self.stepsize = module.Member(T.dscalar(), init = stepsize)
self.stepsize = module.Member(T.dscalar()) self.stepsize = (T.dscalar())
self.stepsize_init = stepsize self.stepsize_init = stepsize
else: else:
# self.stepsize = module.Member(T.value(stepsize)) # self.stepsize = module.Member(T.value(stepsize))
self.stepsize = module.Member(T.constant(stepsize))#work! self.stepsize = (T.constant(stepsize))#work!
if self.stepsize.ndim != 0: if self.stepsize.ndim != 0:
raise ValueError('stepsize must be a scalar', stepsize) raise ValueError('stepsize must be a scalar', stepsize)
...@@ -63,7 +59,6 @@ class StochasticGradientDescent(module.FancyModule): ...@@ -63,7 +59,6 @@ class StochasticGradientDescent(module.FancyModule):
pass pass
@minimizer_factory('sgd')
def sgd_minimizer(stepsize=None, **args): def sgd_minimizer(stepsize=None, **args):
def m(i,c,p,g=None): def m(i,c,p,g=None):
return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args) return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
...@@ -101,6 +96,9 @@ class TanhRnn(Op): ...@@ -101,6 +96,9 @@ class TanhRnn(Op):
return Apply(self, [x, z0, A], [z]) return Apply(self, [x, z0, A], [z])
def perform(self, node, (x,z0,A), out): def perform(self, node, (x,z0,A), out):
assert x is not None
assert z0 is not None
assert A is not None
T,M = x.shape T,M = x.shape
z = N.zeros((T+1, M)) z = N.zeros((T+1, M))
z[0] = z0 z[0] = z0
...@@ -161,10 +159,10 @@ class ExampleRNN(Module): ...@@ -161,10 +159,10 @@ class ExampleRNN(Module):
self.n_vis = n_vis self.n_vis = n_vis
#recurrent weight matrix in latent space #recurrent weight matrix in latent space
self.z0 = Member(T.dvector()) self.z0 = (T.dvector())
self.w = Member(T.dmatrix()) self.w = (T.dmatrix())
self.params = [self.w] self.params = [self.z0, self.w]
#input and target #input and target
x, y = T.dmatrix(), T.dmatrix() x, y = T.dmatrix(), T.dmatrix()
...@@ -176,6 +174,7 @@ class ExampleRNN(Module): ...@@ -176,6 +174,7 @@ class ExampleRNN(Module):
self.minimizer = minimizer([x, y], self.cost, self.params) self.minimizer = minimizer([x, y], self.cost, self.params)
def _instance_initialize(self, obj): def _instance_initialize(self, obj):
print 'INITIALIZE EXAMPLE RNN'
n_vis = self.n_vis n_vis = self.n_vis
rng = N.random.RandomState(unittest_tools.fetch_seed(2342)) rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
...@@ -185,14 +184,14 @@ class ExampleRNN(Module): ...@@ -185,14 +184,14 @@ class ExampleRNN(Module):
obj.minimizer.initialize() obj.minimizer.initialize()
def test_example_rnn(): def test_example_rnn():
minimizer_fn = make_minimizer('sgd', stepsize = 0.001) minimizer_fn = sgd_minimizer(stepsize = 0.001)
n_vis = 5 n_vis = 5
n_out = 3 n_out = 3
n_hid = 4 n_hid = 4
rnn_module = ExampleRNN(n_vis, minimizer_fn) rnn_module = ExampleRNN(n_vis, minimizer_fn)
rnn = rnn_module.make(mode='FAST_RUN') rnn = rnn_module.make()
rng = N.random.RandomState(unittest_tools.fetch_seed(7722342)) rng = N.random.RandomState(unittest_tools.fetch_seed(7722342))
x = rng.randn(10,n_vis) x = rng.randn(10,n_vis)
...@@ -212,6 +211,7 @@ def test_example_rnn(): ...@@ -212,6 +211,7 @@ def test_example_rnn():
print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
else: else:
rnn.minimizer.step_cost(x, y) rnn.minimizer.step_cost(x, y)
assert rnn.minimizer.step_cost(x,y) < -20 #it starts around -.28
def test_WEIRD_STUFF(): def test_WEIRD_STUFF():
n_vis = 3 n_vis = 3
...@@ -224,8 +224,8 @@ def test_WEIRD_STUFF(): ...@@ -224,8 +224,8 @@ def test_WEIRD_STUFF():
LAG = 4 LAG = 4
y[LAG:] = x[:-LAG, 0:n_vis] y[LAG:] = x[:-LAG, 0:n_vis]
minimizer_fn1 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = False) minimizer_fn1 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = False)
minimizer_fn2 = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = True) minimizer_fn2 = sgd_minimizer(stepsize = 0.001, WEIRD_STUFF = True)
rnn_module1 = ExampleRNN(n_vis, minimizer_fn1) rnn_module1 = ExampleRNN(n_vis, minimizer_fn1)
rnn_module2 = ExampleRNN(n_vis, minimizer_fn2) rnn_module2 = ExampleRNN(n_vis, minimizer_fn2)
rnn1 = rnn_module1.make(mode='FAST_RUN') rnn1 = rnn_module1.make(mode='FAST_RUN')
......
#!/usr/bin/env python #!/usr/bin/env python
"""Test compile.module"""
__docformat__ = "restructuredtext en"
import cPickle, numpy, unittest import cPickle, numpy, unittest
from theano.compile.module import * from theano.compile.module import *
import theano.tensor as T import theano.tensor as T
import sys import sys
import theano import theano
#TODO: add test for module.make(member=init_value) #TODO: add test for module.make(member=init_value)
class T_test_module(unittest.TestCase): class T_module(unittest.TestCase):
def test_whats_up_with_submembers(self): def test_whats_up_with_submembers(self):
class Blah(FancyModule): class Blah(Module):
def __init__(self, stepsize): def __init__(self, stepsize):
super(Blah, self).__init__() super(Blah, self).__init__()
self.stepsize = Member(T.value(stepsize)) self.stepsize = T.value(stepsize)
x = T.dscalar() x = T.dscalar()
self.step = Method([x], x - self.stepsize) self.step = Method([x], x - self.stepsize)
B = Blah(0.0) B = Blah(0.0)
b = B.make(mode='FAST_RUN') b = B.make(mode='FAST_RUN')
assert b.stepsize == 0.0
b.step(1.0) b.step(1.0)
assert b.stepsize == 0.0 assert b.stepsize == 0.0
...@@ -57,8 +63,23 @@ class T_test_module(unittest.TestCase): ...@@ -57,8 +63,23 @@ class T_test_module(unittest.TestCase):
assert isinstance(m1.x,(gof.Result)) assert isinstance(m1.x,(gof.Result))
assert isinstance(m1.y,(gof.Result)) assert isinstance(m1.y,(gof.Result))
for i in [m1.lx[0], m1.ly[0], m1.llx[0][0], m1.lly[0][0], m1.ltx[0][0], m1.lty[0][0], m1.ldx[0]['x'], m1.ldy[0]['y'], m1.tx[0], m1.ty[0], m1.tlx[0][0], m1.tly[0][0], m1.ttx[0][0], m1.tty[0][0], m1.tdx[0]['x'], m1.tdy[0]['y'], m1.dx['x'], m1.dy['y'], m1.dlx['x'][0], m1.dly['y'][0], m1.dtx['x'][0], m1.dty['y'][0], m1.ddx['x']['x'], m1.ddy['y']['y']]: for i, obj in enumerate([
assert isinstance(i,(gof.Result)) m1.lx[0], #0
m1.llx[0][0],
m1.ltx[0][0],
m1.ldx[0]['x'],
m1.lty[0][0],#5
m1.ldy[0]['y'],
m1.ly[0],
m1.lly[0][0],
m1.tx[0], #8
m1.ty[0], m1.tlx[0][0],
m1.tly[0][0], m1.ttx[0][0], m1.tty[0][0], m1.tdx[0]['x'],
m1.tdy[0]['y'], m1.dx['x'],
m1.dy['y'], m1.dlx['x'][0], m1.dly['y'][0],
m1.dtx['x'][0], m1.dty['y'][0], m1.ddx['x']['x'],
m1.ddy['y']['y']]):
assert isinstance(obj,(gof.Result))
inst=m1.make() inst=m1.make()
...@@ -98,23 +119,72 @@ class T_test_module(unittest.TestCase): ...@@ -98,23 +119,72 @@ class T_test_module(unittest.TestCase):
for i,j in zip(get_l2(),range(len(get_l2()))): for i,j in zip(get_l2(),range(len(get_l2()))):
assert i[0]==j assert i[0]==j
local_test(lambda:T.dscalar(),lambda:Member(T.dscalar())) local_test(lambda:T.dscalar(),lambda:T.dscalar())
local_test(lambda:T.value(1),lambda:Member(T.value(2))) local_test(lambda:T.value(1),lambda:T.value(2))
local_test(lambda:T.constant(1),lambda:Member(T.constant(2))) local_test(lambda:T.constant(1),lambda:T.constant(2))
def test_compound_structure_assignment(self): def test_list_assign(self):
"""Test that list members can be assigned list-wise""" """Test that list members can be assigned list-wise"""
def local_test(x,y): def local_test(x,y):
m1=Module() m1=Module()
m1.l=[x(), y()]#cast Result]
#create a list with some results in it
m1.l=[x(), y()]
# create a Method that makes the second list element a shared Member
m1.f=Method([], m1.l[1]) m1.f=Method([], m1.l[1])
m1.g=Method([], m1.l[0])
m = m1.make() m = m1.make()
#assign 4 and 5 to the two results' containers in m
m.l = [4, 5] m.l = [4, 5]
print 'm.f', m.f()
assert numpy.all(5 == m.f())
assert numpy.all(4 == m.g())
local_test(lambda:T.dscalar(),lambda:T.dscalar())
local_test(lambda:T.value(1),lambda:T.value(2))
def test_tuple_assign(self):
"""Test that list members can be assigned tuple-wise"""
def local_test(x,y):
m1=Module()
m1.l=(x(), y())
# create a Method that makes the second list element a shared Member
m1.g=Method([], m1.l[0])
m1.f=Method([], m1.l[1])
m = m1.make()
#assign 4 and 5 to the two results' containers in m
m.l = (4, 5)
assert 5 == m.f()
assert 4 == m.g()
local_test(lambda:T.dscalar(),lambda:T.dscalar())
local_test(lambda:T.value(1),lambda:T.value(2))
def test_dict_assign(self):
"""Test that list members can be assigned dict-wise"""
def local_test(x,y):
m1=Module()
##DICT
m1.l={'x':x(), 'y':y()}
# create a Method that makes the second list element a shared Member
m1.f=Method([], m1.l['y'])
m1.g=Method([], m1.l['x'])
m = m1.make()
#assign 4 and 5 to the two results' containers in m
m.l = dict(x=4, y=5)
assert 5 == m.f() assert 5 == m.f()
assert 4 == m.g()
local_test(lambda:T.dscalar(),lambda:Member(T.dscalar())) print 'dscalar test'
local_test(lambda:T.value(1),lambda:Member(T.value(2))) local_test(lambda:T.dscalar(),lambda:T.dscalar())
local_test(lambda:T.constant(1),lambda:Member(T.constant(2))) print 'value test'
local_test(lambda:T.value(1),lambda:T.value(2))
def test_method_in_list_or_dict(self): def test_method_in_list_or_dict(self):
...@@ -197,11 +267,12 @@ class T_test_module(unittest.TestCase): ...@@ -197,11 +267,12 @@ class T_test_module(unittest.TestCase):
def get_element(i): def get_element(i):
return [i.x,i.lx[0],i.tx[0],i.dx['x'],i.llx[0][0], i.llx[1][0], i.ltx[0][0], i.ldx[0]['x'], i.tlx[0][0], i.tlx[0][0], i.tdx[0]['x'], i.dlx['x'][0], i.dtx['x'][0], i.ddx['x']['x']] return [i.x,i.lx[0],i.tx[0],i.dx['x'],i.llx[0][0], i.llx[1][0], i.ltx[0][0], i.ldx[0]['x'], i.tlx[0][0], i.tlx[0][0], i.tdx[0]['x'], i.dlx['x'][0], i.dtx['x'][0], i.ddx['x']['x']]
m1=Module() m1=Module()
m2=Module() m2=Module()
x=T.dscalar() x=T.dscalar()
populate_module(m1,x) populate_module(m1,x)
populate_module(m2,Member(x)) populate_module(m2,x)
#m1.x and m2.x should not be shared as their is no hierarchi link between them. #m1.x and m2.x should not be shared as their is no hierarchi link between them.
inst1=m1.make() inst1=m1.make()
inst2=m2.make() inst2=m2.make()
...@@ -248,8 +319,8 @@ class T_test_module(unittest.TestCase): ...@@ -248,8 +319,8 @@ class T_test_module(unittest.TestCase):
m4=Module() m4=Module()
x=T.dscalar() x=T.dscalar()
populate_module(m1,x) populate_module(m1,x)
populate_module(m2,Member(x)) populate_module(m2,(x))
populate_module(m4,Member(x)) populate_module(m4,(x))
#m1.x and m2.x should not be shared as their is no hierarchi link between them. #m1.x and m2.x should not be shared as their is no hierarchi link between them.
inst1=m1.make() inst1=m1.make()
inst2=m2.make() inst2=m2.make()
...@@ -323,49 +394,90 @@ class T_test_module(unittest.TestCase): ...@@ -323,49 +394,90 @@ class T_test_module(unittest.TestCase):
assert isinstance(inst.dy['y'],theano.compile.function_module.Function) assert isinstance(inst.dy['y'],theano.compile.function_module.Function)
assert isinstance(inst.tty[0][0],theano.compile.function_module.Function) assert isinstance(inst.tty[0][0],theano.compile.function_module.Function)
print >> sys.stderr, "MODULE TEST IMPLEMENTED BUT WE DON'T KNOW WHAT WE WANT AS A RESULT"
def test_shared_method_N(self):
"""Test that Methods can be shared an arbitrary number of times between many submodules and
internal data structures."""
#put them in subModules, sub-sub-Modules, shared between a list and a dict, shared between assert m1.y is m1.ly[0]
#a list and a submodule with a dictionary, etc... assert inst.y is inst.ly[0]
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" assert inst.y is inst.lly[0][0]
assert inst.y is inst.ty[0]
assert inst.y is inst.tty[0][0]
assert inst.y is inst.dy['y']
def test_member_method_inputs(self): def test_member_method_inputs(self):
"""Test that module Members can be named as Method inputs, in which case the function will """Test that module Members can be named as Method inputs, in which case the function will
*not* use the storage allocated for the Module's version of that Member. *not* use the storage allocated for the Module's version of that Member.
si le module a un membre x et qu''une fct un parametre appele x qui n''est pas le membre cela doit etre bien traiter. """
les poids ne change pas
""" # test that explicit Method inputs don't use shared storage
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" M = Module()
M.x = T.dscalar()
M.y = T.dscalar()
M.f = Method([M.x], M.x + M.y)
M.g = Method([M.y], M.x - M.y)
m = M.make()
m.y = 77
assert m.f(23) == 100
assert m.x == None
m.x = 1000
assert m.g(23) == 977
assert m.y == 77
assert m.x == 1000
def test_member_input_flags(self): def test_member_input_flags(self):
"""Test that we can manipulate the mutable, strict, etc. flags (see SymbolicInput) of """Test that we can manipulate the mutable, strict, etc. flags (see SymbolicInput) of
Method inputs""" Method inputs"""
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED"
M = Module()
M.x = T.dvector()
M.y = T.dvector()
xval= numpy.asarray([0, 0.5])
M.f = Method([io.In(M.x,
mutable=True,
update=(M.x - M.y),
value=xval)], M.x + M.y)
m = M.make()
m.y = numpy.asarray([1, 2])
assert numpy.all(m.f(xval) == [1, 2.5])
assert numpy.all(xval == [-1, -1.5])
def test_member_output_flags(self): def test_member_output_flags(self):
"""Test that we can manipulate the output flags (just 'borrow' I think, see SymbolicOutput) """Test that we can manipulate the output flags (just 'borrow' I think, see SymbolicOutput)
of Method outputs""" of Method outputs"""
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" M = Module()
M.x = T.dvector()
M.f = Method([M.x], io.Out(M.x*4, borrow=True), mode='FAST_RUN')
m = M.make()
def test_sanity_check_mode(self): v0 = m.f([5, 8])
"""Test that Module.make(self) can take the same list of Modes that function can, so we can v0_copy = v0 * 1
debug modules""" m.f([3, 2])
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" assert numpy.all(v0 != v0_copy)
def test_member_value(self): def test_member_value(self):
"""Test that module Members of Value work correctly. As Result?""" """Test that module Members of Value work correctly. As Result?"""
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" M = Module()
x = T.dscalar()
M.y = T.value(40)
M.f = Method([x], x + 2 * M.y)
m = M.make()
m.y = 80
assert m.f(20) == 180
def test_member_constant(self): def test_member_constant(self):
"""Test that module Members of Constant work correctly. """Test that module Members of Constant work correctly.
As Result with more optimization?""" As Result with more optimization?"""
print >> sys.stderr, "WARNING MODULE TEST NOT IMPLEMENTED" M = Module()
x = T.dscalar()
M.y = T.constant(40)
M.f = Method([x], x + 2 * M.y)
m = M.make()
try:
m.y = 77 #fail?
assert 0 #assign to constant should not have worked
except:
pass
assert m.f(20) == 100
def test_raise_NotImplemented(self): def test_raise_NotImplemented(self):
c=Component() c=Component()
...@@ -380,7 +492,7 @@ class T_test_module(unittest.TestCase): ...@@ -380,7 +492,7 @@ class T_test_module(unittest.TestCase):
self.assertRaises(NotImplementedError, c.get,"n") self.assertRaises(NotImplementedError, c.get,"n")
self.assertRaises(NotImplementedError, c.set,"n",1) self.assertRaises(NotImplementedError, c.set,"n",1)
def test_tuple_members(self): def test_tuple_members():
M = Module() M = Module()
M.a = (1,1) M.a = (1,1)
...@@ -393,11 +505,65 @@ class T_test_module(unittest.TestCase): ...@@ -393,11 +505,65 @@ class T_test_module(unittest.TestCase):
assert isinstance(M.a, tuple) assert isinstance(M.a, tuple)
def test_method_updates():
# updates work
M = Module()
M.x = T.dvector()
x = T.dvector()
xval= numpy.asarray([0, 0.5])
M.f = Method([x], M.x*4, updates={M.x:M.x * 2}, mode='FAST_COMPILE')
m = M.make(mode='FAST_RUN')
m.x = xval
m.f([9,9])
assert numpy.all(m.x == [0, 1])
assert numpy.all(xval == [0, 0.5])
# In(update) works
M = Module()
M.x = T.dvector()
x = T.dvector()
M.f = Method([x, io.In(M.x, value=xval, update=M.x*2)], M.x*4)
m = M.make()
m.f([9,9])
assert m.x is None
assert numpy.all(xval == [0, 1])
# when a result is listed explicitly and in an update, then there's a problem.
M = Module()
M.x = T.dvector()
x = T.dvector()
M.f = Method([x, io.In(M.x, value=xval, update=M.x*2)], M.x*4,
updates={M.x:M.x * 7})
try:
m = M.make()
assert False
except ValueError, e:
if str(e[0]).startswith('Result listed in both inputs and up'):
pass
else:
raise
def test_method_mode():
"""Test that Methods can override the module build mode"""
M = Module()
M.x = T.dvector()
M.f = Method([M.x], M.x*4, mode='FAST_COMPILE')
M.g = Method([M.x], M.x*4)
M.h = Method([M.x], M.x*4)
m = M.make(mode='FAST_RUN')
assert m.f.maker.mode != m.g.maker.mode
assert m.h.maker.mode == m.g.maker.mode
assert numpy.all(m.f([1,2]) == m.g([1,2]))
def test_pickle(): def test_pickle():
"""Test that a module can be pickled""" """Test that a module can be pickled"""
M = Module() M = Module()
M.x = Member(T.dmatrix()) M.x = (T.dmatrix())
M.y = Member(T.dmatrix()) M.y = (T.dmatrix())
a = T.dmatrix() a = T.dmatrix()
M.f = Method([a], a + M.x + M.y) M.f = Method([a], a + M.x + M.y)
M.g = Method([a], a * M.x * M.y) M.g = Method([a], a * M.x * M.y)
...@@ -418,13 +584,11 @@ def test_pickle(): ...@@ -418,13 +584,11 @@ def test_pickle():
assert m_dup.x is m_dup.g.input_storage[1].data assert m_dup.x is m_dup.g.input_storage[1].data
assert m_dup.y is m_dup.g.input_storage[2].data assert m_dup.y is m_dup.g.input_storage[2].data
from numpy.testing import *
@dec.knownfailureif(True, "These branch cuts are known to fail")
def test_pickle_aliased_memory(): def test_pickle_aliased_memory():
try:
M = Module() M = Module()
M.x = Member(T.dmatrix()) M.x = (T.dmatrix())
M.y = Member(T.dmatrix()) M.y = (T.dmatrix())
a = T.dmatrix() a = T.dmatrix()
M.f = Method([a], a + M.x + M.y) M.f = Method([a], a + M.x + M.y)
M.g = Method([a], a * M.x * M.y) M.g = Method([a], a * M.x * M.y)
...@@ -450,6 +614,9 @@ def test_pickle_aliased_memory(): ...@@ -450,6 +614,9 @@ def test_pickle_aliased_memory():
assert m.y[0,0] == 3.142 assert m.y[0,0] == 3.142
m_dup.x[1,0] = 3.142 m_dup.x[1,0] = 3.142
assert m_dup.y[0,0] == 3.142 assert m_dup.y[0,0] == 3.142
except Exception, e:
raise Exception('Known Failure: These branch cuts are known to fail', str(e))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -473,15 +473,6 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -473,15 +473,6 @@ class GemmLocalOptimizer(LocalOptimizer):
return [T.add(*new_add_inputs)] return [T.add(*new_add_inputs)]
return False return False
@staticmethod
def failure_callback(exc, nav, repl_pairs):
"""WRITEME"""
if not isinstance(exc, InconsistencyError):
traceback.print_exc()
else:
#print 'GEMM caused cycle, it happens.'
pass
@staticmethod @staticmethod
def _as_scalar(res): def _as_scalar(res):
"""Return None or a TensorResult whose type is in T.float_scalar_types""" """Return None or a TensorResult whose type is in T.float_scalar_types"""
...@@ -579,11 +570,11 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -579,11 +570,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and # TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer. # an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm') failure_callback=OpKeyOptimizer.warn_inplace), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
......
...@@ -17,6 +17,8 @@ def cross_entropy(target, output, axis=1): ...@@ -17,6 +17,8 @@ def cross_entropy(target, output, axis=1):
@warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
""" """
return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis) return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
def quadratic(target, output, axis=1):
return T.mean(T.sqr(target - output), axis=axis)
class QuadraticDenoisingAA(module.Module): class QuadraticDenoisingAA(module.Module):
"""Quadratic de-noising Auto-encoder """Quadratic de-noising Auto-encoder
...@@ -70,27 +72,36 @@ class QuadraticDenoisingAA(module.Module): ...@@ -70,27 +72,36 @@ class QuadraticDenoisingAA(module.Module):
# ACQUIRE/MAKE INPUT # ACQUIRE/MAKE INPUT
if not input: if not input:
input = T.matrix('input') input = T.matrix('input')
self.input = theano.External(input) #self.input = theano.External(input)
self.input = (input)
# HYPER-PARAMETERS # HYPER-PARAMETERS
self.lr = theano.Member(T.scalar()) #self.lr = theano.Member(T.scalar())
self.lr = (T.scalar())
# PARAMETERS # PARAMETERS
if _qfilters is None: if _qfilters is None:
self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)] #self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
else: else:
self.qfilters = [theano.Member(q) for q in _qfilters] #self.qfilters = [theano.Member(q) for q in _qfilters]
self.qfilters = [(q) for q in _qfilters]
self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1) #self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
self.w1 = (T.matrix('w1')) if _w1 is None else (_w1)
if _w2 is None: if _w2 is None:
if not tie_weights: if not tie_weights:
self.w2 = theano.Member(T.matrix()) #self.w2 = theano.Member(T.matrix())
self.w2 = (T.matrix())
else: else:
self.w2 = self.w1.T self.w2 = self.w1.T
else: else:
self.w2 = theano.Member(_w2) #self.w2 = theano.Member(_w2)
self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1) self.w2 = (_w2)
self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2) #self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
self.b1 = (T.vector('b1')) if _b1 is None else (_b1)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
self.b2 = (T.vector('b2')) if _b2 is None else (_b2)
# # REGULARIZATION COST # # REGULARIZATION COST
# self.regularization = self.build_regularization() # self.regularization = self.build_regularization()
...@@ -168,6 +179,7 @@ class QuadraticDenoisingAA(module.Module): ...@@ -168,6 +179,7 @@ class QuadraticDenoisingAA(module.Module):
#self.validate = theano.Method(self.input, [self.cost, self.output]) #self.validate = theano.Method(self.input, [self.cost, self.output])
def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale): def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale):
print 'QDAA init'
""" """
qfilter_relscale is the initial range for any quadratic filters (relative to the linear qfilter_relscale is the initial range for any quadratic filters (relative to the linear
filter's initial range) filter's initial range)
...@@ -212,7 +224,8 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA): ...@@ -212,7 +224,8 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
unittest_tools.seed_rng() unittest_tools.seed_rng()
def build_corrupted_input(self): def build_corrupted_input(self):
self.noise_level = theano.Member(T.scalar()) #self.noise_level = theano.Member(T.scalar())
self.noise_level = (T.scalar())
return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
def hid_activation_function(self, activation): def hid_activation_function(self, activation):
...@@ -262,12 +275,17 @@ class Module_Nclass(module.FancyModule): ...@@ -262,12 +275,17 @@ class Module_Nclass(module.FancyModule):
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False): def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
super(Module_Nclass, self).__init__() #boilerplate super(Module_Nclass, self).__init__() #boilerplate
self.x = module.Member(x) if x is not None else T.matrix('input') #self.x = module.Member(x) if x is not None else T.matrix('input')
self.targ = module.Member(targ) if targ is not None else T.lvector() self.x = (x) if x is not None else T.matrix('input')
#self.targ = module.Member(targ) if targ is not None else T.lvector()
self.targ = (targ) if targ is not None else T.lvector()
self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
self.b = module.Member(b) if b is not None else module.Member(T.dvector()) self.w = (w) if w is not None else (T.dmatrix())
self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) #self.b = module.Member(b) if b is not None else module.Member(T.dvector())
self.b = (b) if b is not None else (T.dvector())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
self.lr = (lr) if lr is not None else (T.dscalar())
self.params = [p for p in [self.w, self.b] if p.owner is None] self.params = [p for p in [self.w, self.b] if p.owner is None]
...@@ -309,8 +327,6 @@ class Module_Nclass(module.FancyModule): ...@@ -309,8 +327,6 @@ class Module_Nclass(module.FancyModule):
class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01): class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01):
#initialize is called by Module.make #initialize is called by Module.make
def initialize(self, input_size, input_representation_size, hidden_representation_size, output_size, lr, seed, noise_level, qfilter_relscale): def initialize(self, input_size, input_representation_size, hidden_representation_size, output_size, lr, seed, noise_level, qfilter_relscale):
# ASK JAMES: Is the following necessary?
# super(ConvolutionalMLPInstance, self)._instance_initialize(obj, **kwargs)
R = N.random.RandomState(unittest_tools.fetch_seed(seed)) R = N.random.RandomState(unittest_tools.fetch_seed(seed))
...@@ -323,19 +339,29 @@ class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01): ...@@ -323,19 +339,29 @@ class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01):
# for layer in obj.layers: # for layer in obj.layers:
# if layer.lr is None: # if layer.lr is None:
# layer.lr = lr # layer.lr = lr
assert self.input_representations[-1] is not self.input_representations[0]
assert self.input_representations[-1].w1 is self.input_representations[0].w1
for i in self.input_representations: for i in self.input_representations:
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale) # i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, noise_level=noise_level, seed=R.random_integers(2**30), lr=lr, qfilter_relscale=qfilter_relscale) i.initialize(input_size=self.input_size,
hidden_size=self.input_representation_size, noise_level=noise_level,
seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
print type(i.w1)
assert isinstance(i.w1, N.ndarray)
for i in self.input_representations[1:]: for i in self.input_representations[1:]:
print type(i.w1)
assert isinstance(i.w1, N.ndarray)
assert (i.w1 == self.input_representations[0].w1).all() assert (i.w1 == self.input_representations[0].w1).all()
assert (i.w2 == self.input_representations[0].w2).all() assert (i.w2 == self.input_representations[0].w2).all()
assert (i.b1 == self.input_representations[0].b1).all() assert (i.b1 == self.input_representations[0].b1).all()
assert (i.b2 == self.input_representations[0].b2).all() assert (i.b2 == self.input_representations[0].b2).all()
assert all((a==b).all() for a, b in zip(i.qfilters, self.input_representations[0].qfilters)) assert all((a==b).all() for a, b in zip(i.qfilters, self.input_representations[0].qfilters))
self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size), hidden_size=self.hidden_representation_size, noise_level=noise_level, seed=R.random_integers(2**30), lr=lr, qfilter_relscale=qfilter_relscale) self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size),
hidden_size=self.hidden_representation_size, noise_level=noise_level,
seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
self.output.initialize(n_in=self.hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30)) self.output.initialize(n_in=self.hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
...@@ -352,7 +378,8 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -352,7 +378,8 @@ class ConvolutionalMLP(module.FancyModule):
): ):
super(ConvolutionalMLP, self).__init__() super(ConvolutionalMLP, self).__init__()
self.lr = module.Member(T.scalar()) #self.lr = module.Member(T.scalar())
self.lr = (T.scalar())
self.inputs = [T.dmatrix() for i in range(window_size)] self.inputs = [T.dmatrix() for i in range(window_size)]
self.targ = T.lvector() self.targ = T.lvector()
...@@ -382,6 +409,7 @@ class ConvolutionalMLP(module.FancyModule): ...@@ -382,6 +409,7 @@ class ConvolutionalMLP(module.FancyModule):
_qfilters = self.input_representations[0].qfilters _qfilters = self.input_representations[0].qfilters
) )
) )
assert self.input_representations[-1].w1 is self.input_representations[0].w1
self.input_representation = T.concatenate([i.hidden for i in self.input_representations], axis=1) self.input_representation = T.concatenate([i.hidden for i in self.input_representations], axis=1)
self.hidden = QDAA( self.hidden = QDAA(
...@@ -445,13 +473,11 @@ def create(window_size=3, ...@@ -445,13 +473,11 @@ def create(window_size=3,
""" Create a convolutional model. """ """ Create a convolutional model. """
activation_function = T.tanh activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size = window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function, activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic, reconstruction_cost_function = quadratic,
tie_weights = False tie_weights = False
) )
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
...@@ -471,13 +497,11 @@ def create_realistic(window_size=3,#7, ...@@ -471,13 +497,11 @@ def create_realistic(window_size=3,#7,
""" Create a convolutional model. """ """ Create a convolutional model. """
activation_function = T.tanh activation_function = T.tanh
import pylearn.algorithms.cost
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size = window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function, activation_function = activation_function,
reconstruction_cost_function = pylearn.algorithms.cost.quadratic, reconstruction_cost_function = quadratic,
tie_weights = False tie_weights = False
) )
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
...@@ -522,8 +546,8 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10, ...@@ -522,8 +546,8 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10,
s0, s1 = [str(j) for j in m.pretraining_update(*inputs)] s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1 print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if iters_per_unsup == 10: if iters_per_unsup == 10:
assert s0.startswith('0.40218760858') assert s0.startswith('0.403044')
assert s1.startswith('0.074450801777') assert s1.startswith('0.074898')
print 'FINETUNING GRAPH' print 'FINETUNING GRAPH'
print 'SUPERVISED PHASE COSTS (%s)'%optimizer print 'SUPERVISED PHASE COSTS (%s)'%optimizer
...@@ -533,9 +557,9 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10, ...@@ -533,9 +557,9 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10,
s0 = str(m.finetuning_update(*(inputs + [targets]))) s0 = str(m.finetuning_update(*(inputs + [targets])))
print iters_per_sup * (i+1), s0 print iters_per_sup * (i+1), s0
if iters_per_sup == 10: if iters_per_sup == 10:
assert s0.startswith('15.65127763')#should check for the 8 decimal only. assert s0.startswith('15.6511')#should check for the 8 decimal only.
if __name__ == '__main__': def jtest_main():
from theano import gof from theano import gof
JTEST = theano.compile.mode.optdb.query(*sys.argv[2:]) JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
print 'JTEST', JTEST print 'JTEST', JTEST
...@@ -543,3 +567,23 @@ if __name__ == '__main__': ...@@ -543,3 +567,23 @@ if __name__ == '__main__':
optimizer = eval(sys.argv[1]) optimizer = eval(sys.argv[1])
test_naacl_model(optimizer, 10, 10, realistic=False) test_naacl_model(optimizer, 10, 10, realistic=False)
def real_main():
test_naacl_model()
def profile_main():
# This is the main function for profiling
# We've renamed our original main() above to real_main()
import cProfile, pstats, StringIO
prof = cProfile.Profile()
prof = prof.runctx("real_main()", globals(), locals())
stream = StringIO.StringIO()
stats = pstats.Stats(prof)
stats.sort_stats("time") # Or cumulative
stats.print_stats(80) # 80 = how many to print
# The rest is optional.
# stats.print_callees()
# stats.print_callers()
if __name__ == '__main__':
#real_main()
profile_main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论