提交 848019a9 authored 作者: Frederic Bastien's avatar Frederic Bastien
...@@ -39,6 +39,33 @@ class DestroyHandler(toolbox.Bookkeeper): ...@@ -39,6 +39,33 @@ class DestroyHandler(toolbox.Bookkeeper):
return self.map[env].orderings(env) return self.map[env].orderings(env)
def getroot(r, view_i):
"""
For views: Return non-view result which is ultimatly viewed by r.
For non-views: return self.
"""
try:
return getroot(view_i[r], view_i)
except KeyError:
return r
def add_impact(r, view_o, impact):
"""
In opposition to getroot, which finds the result that is viewed *by* r, this function
returns all the results that are views of r.
:param impact: is a set of results that are views of r
:param droot: a dictionary mapping views -> r
"""
for v in view_o.get(r,[]):
impact.add(v)
add_impact(v, view_o, impact)
def get_impact(root, view_o):
impact = set()
add_impact(root, view_o, impact)
return impact
class DestroyHandlerHelper2(toolbox.Bookkeeper): class DestroyHandlerHelper2(toolbox.Bookkeeper):
"""WRITEME""" """WRITEME"""
...@@ -53,13 +80,14 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -53,13 +80,14 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
if hasattr(env, attr): if hasattr(env, attr):
raise toolbox.AlreadyThere("DestroyHandler feature is already present or in conflict with another plugin.") raise toolbox.AlreadyThere("DestroyHandler feature is already present or in conflict with another plugin.")
def get_destroyers(r): def get_destroyers_of(r):
d_of = self.get_destroyer_of(r) droot, impact, root_destroyer = self.refresh_droot_impact()
if d_of: try:
return [d_of] return [root_destroyer[droot[r]]]
else: except:
return [] return []
env.destroyers = get_destroyers
env.destroyers = get_destroyers_of
env.destroy_handler = self env.destroy_handler = self
self.env = env self.env = env
...@@ -68,11 +96,18 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -68,11 +96,18 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
self.view_o = {} # result -> set of results self.view_o = {} # result -> set of results
#clients: how many times does an apply use a given result #clients: how many times does an apply use a given result
self.clients = {} # result -> apply -> ninputs self.clients = {} # result -> apply -> ninputs
self.stale_droot = True
self.debug_all_apps = set() self.debug_all_apps = set()
toolbox.Bookkeeper.on_attach(self, env) toolbox.Bookkeeper.on_attach(self, env)
def build_droot_impact(self): def refresh_droot_impact(self):
if self.stale_droot:
self.droot, self.impact, self.root_destroyer = self._build_droot_impact()
self.stale_droot = False
return self.droot, self.impact, self.root_destroyer
def _build_droot_impact(self):
droot = {} # destroyed view + nonview results -> foundation droot = {} # destroyed view + nonview results -> foundation
impact = {} # destroyed nonview result -> it + all views of it impact = {} # destroyed nonview result -> it + all views of it
root_destroyer = {} # root -> destroyer apply root_destroyer = {} # root -> destroyer apply
...@@ -83,32 +118,22 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -83,32 +118,22 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
raise NotImplementedError() raise NotImplementedError()
input_idx = input_idx_list[0] input_idx = input_idx_list[0]
input = app.inputs[input_idx] input = app.inputs[input_idx]
def getroot(r): input_root = getroot(input, self.view_i)
try:
return getroot(self.view_i[r])
except KeyError:
return r
input_root = getroot(input)
if input_root in droot: if input_root in droot:
raise InconsistencyError("Multiple destroyers of %s" % input_root) raise InconsistencyError("Multiple destroyers of %s" % input_root)
droot[input_root] = input_root droot[input_root] = input_root
root_destroyer[input_root] = app root_destroyer[input_root] = app
impact[input_root] = set([input_root]) #input_impact = set([input_root])
def build_stuff(r): #add_impact(input_root, self.view_o, input_impact)
for v in self.view_o.get(r,[]): input_impact = get_impact(input_root, self.view_o)
assert v not in droot for v in input_impact:
droot[v] = input_root assert v not in droot
impact[input_root].add(v) droot[v] = input_root
build_stuff(v)
build_stuff(input_root)
return droot, impact, root_destroyer impact[input_root] = input_impact
impact[input_root].add(input_root)
def get_destroyer_of(self, r): return droot, impact, root_destroyer
droot, impact, root_destroyer = self.build_droot_impact()
for root in impact:
if r in impact[root]:
return root_destroyer[root]
def on_detach(self, env): def on_detach(self, env):
if env is not self.env: if env is not self.env:
...@@ -117,6 +142,7 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -117,6 +142,7 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
del self.view_i del self.view_i
del self.view_o del self.view_o
del self.clients del self.clients
del self.stale_droot
assert self.env.destroyer_handler is self assert self.env.destroyer_handler is self
delattr(self.env, 'destroyers') delattr(self.env, 'destroyers')
delattr(self.env, 'destroy_handler') delattr(self.env, 'destroy_handler')
...@@ -148,6 +174,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -148,6 +174,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
for i, output in enumerate(app.outputs): for i, output in enumerate(app.outputs):
self.clients.setdefault(output, {}) self.clients.setdefault(output, {})
self.stale_droot = True
def on_prune(self, env, app): def on_prune(self, env, app):
"""Remove Apply instance from set which must be computed""" """Remove Apply instance from set which must be computed"""
...@@ -178,6 +206,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -178,6 +206,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
self.view_o[i].remove(o) self.view_o[i].remove(o)
if not self.view_o[i]: if not self.view_o[i]:
del self.view_o[i] del self.view_o[i]
self.stale_droot = True
def on_change_input(self, env, app, i, old_r, new_r): def on_change_input(self, env, app, i, old_r, new_r):
"""app.inputs[i] changed from old_r to new_r """ """app.inputs[i] changed from old_r to new_r """
...@@ -214,6 +244,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -214,6 +244,8 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
del self.view_o[old_r] del self.view_o[old_r]
self.view_o.setdefault(new_r,set()).add(output) self.view_o.setdefault(new_r,set()).add(output)
self.stale_droot = True
def validate(self, env): def validate(self, env):
"""Return None """Return None
...@@ -261,7 +293,7 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper): ...@@ -261,7 +293,7 @@ class DestroyHandlerHelper2(toolbox.Bookkeeper):
# BUILD DATA STRUCTURES # BUILD DATA STRUCTURES
# CHECK for multiple destructions during construction of variables # CHECK for multiple destructions during construction of variables
droot, impact, __ignore = self.build_droot_impact() droot, impact, __ignore = self.refresh_droot_impact()
#print "droot", droot #print "droot", droot
#print "impact", impact #print "impact", impact
#print "view_i", self.view_i #print "view_i", self.view_i
......
...@@ -21,6 +21,9 @@ class MyType(Type): ...@@ -21,6 +21,9 @@ class MyType(Type):
def __eq__(self, other): def __eq__(self, other):
return isinstance(other, MyType) return isinstance(other, MyType)
def __hash__(self):
return hash(MyType)
def MyResult(name): def MyResult(name):
return Result(MyType(), None, None, name = name) return Result(MyType(), None, None, name = name)
......
...@@ -21,11 +21,12 @@ class AbstractFunctionError(Exception): ...@@ -21,11 +21,12 @@ class AbstractFunctionError(Exception):
class object2(object): class object2(object):
__slots__ = [] __slots__ = []
def __hash__(self): if 0:
# this fixes silent-error-prone new-style class behavior def __hash__(self):
if hasattr(self, '__eq__') or hasattr(self, '__cmp__'): # this fixes silent-error-prone new-style class behavior
raise TypeError("unhashable object: %s" % self) if hasattr(self, '__eq__') or hasattr(self, '__cmp__'):
return id(self) raise TypeError("unhashable object: %s" % self)
return id(self)
def __ne__(self, other): def __ne__(self, other):
return not self == other return not self == other
......
...@@ -1806,24 +1806,37 @@ def reshape(x, newshape, ndim=None): ...@@ -1806,24 +1806,37 @@ def reshape(x, newshape, ndim=None):
class Flatten(Op): class Flatten(Op):
"""Flattens the input node""" """Flattens a tensor to `outdim` dimensions by preserving the leading outdim-1 shape
components.
"""
#Could be done as a reshape, but this is more direct. #Could be done as a reshape, but this is more direct.
#TODO: optimize reshape(x, prod(shape(x))) -> flatten(x) #TODO: optimize reshape(x, prod(shape(x))) -> flatten(x)
def __init__(self, ldim=None): def __init__(self, outdim=1):
self.ldim = ldim self.outdim = int(outdim)
def __eq__(self, other):
return type(self) == type(other) and self.outdim == other.outdim
def __hash__(self):
return hash(type(self))^hash(self.outdim)
def make_node(self, x): def make_node(self, x):
x = as_tensor(x) t_x = as_tensor(x)
outdim = 1 if self.ldim is None else x.ndim - self.ldim +1 if self.outdim < 1 or (x.ndim and self.outdim > x.ndim):
return gof.Apply(self, [x], [tensor(x.type.dtype, (False,)*outdim)]) raise ValueError('invalid output ndimensions(%i) for tensor of rank %i' %(self.outdim, t_x.ndim))
return gof.Apply(self, [t_x], [tensor(x.type.dtype, (False,)*self.outdim)])
def perform(self, node, (x,), (out,)): def perform(self, node, (x,), (out,)):
# flatten the entire tensor or just the last ldim dimensions outdim = self.outdim
out[0] = x.flatten() if self.ldim is None else\ if outdim == 1:
x.reshape(numpy.r_[x.shape[:-self.ldim],\ out[0] = x.flatten()
numpy.prod(x.shape[-self.ldim:])]) elif outdim == len(x.shape):
out[0] = x.copy()
else:
newshape = x.shape[:outdim-1] + (numpy.prod(x.shape[outdim-1:]),)
#print 'newshape', newshape, x.shape, x.shape
out[0] = x.reshape(newshape)
def grad(self, (x,), (g_out,)): def grad(self, (x,), (g_out,)):
return [reshape(g_out, shape(x), x.ndim)] return [reshape(g_out, shape(x), x.ndim)]
def flatten(ldim=None): return Flatten(ldim) def flatten(x, outdim=1):
return Flatten(outdim)(x)
class TileGrad(Op): class TileGrad(Op):
"""Calculates the gradient of the Tile Op""" """Calculates the gradient of the Tile Op"""
......
...@@ -1616,7 +1616,7 @@ def test_reshape(): ...@@ -1616,7 +1616,7 @@ def test_reshape():
tensor.verify_grad(None, Reshape(2), [a_val,numpy.asarray([2,3], dtype='float64')]) tensor.verify_grad(None, Reshape(2), [a_val,numpy.asarray([2,3], dtype='float64')])
def test_flatten(): def test_flatten_outdimNone():
""" Flatten always returns a copy of the array. There is no danger with in-place """ Flatten always returns a copy of the array. There is no danger with in-place
operations and thus no need to test it.""" operations and thus no need to test it."""
...@@ -1631,9 +1631,69 @@ def test_flatten(): ...@@ -1631,9 +1631,69 @@ def test_flatten():
tensor.verify_grad(None, Flatten(), [a_val]) tensor.verify_grad(None, Flatten(), [a_val])
def test_flatten_scalar():
a = dscalar()
c = flatten(a)
f = function([a], c, mode='FAST_COMPILE')
a_val = numpy.asarray(3.0, dtype='float64')
c_val = numpy.asarray([3.0], dtype='float64')
assert numpy.all(f(a_val)==c_val)
f = function([a], c, mode='FAST_RUN')
assert numpy.all(f(a_val)==c_val)
#tensor.verify_grad(None, Flatten(), [a_val]) #TODO: fix verify_grd to work on scalars
def test_flatten_outdim1():
a = dmatrix()
c = flatten(a, 1)
f = function([a], c, mode='FAST_COMPILE')
a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
c_val = numpy.asarray([0,1,2,3,4,5], dtype='float64')
assert numpy.all(f(a_val)==c_val)
f = function([a], c, mode='FAST_RUN')
assert numpy.all(f(a_val)==c_val)
tensor.verify_grad(None, Flatten(1), [a_val])
def test_flatten_outdim2():
a = dmatrix()
c = flatten(a, 2)
f = function([a], c, mode='FAST_COMPILE')
a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
assert numpy.all(f(a_val)==a_val)
f = function([a], c, mode='FAST_RUN')
assert numpy.all(f(a_val)==a_val)
tensor.verify_grad(None, Flatten(2), [a_val])
def test_flatten_outdim2_of_3():
a = Tensor('float64', (False, False, False))()
c = flatten(a, 2)
f = function([a], c, mode='FAST_COMPILE')
a_val = numpy.asarray([[[0,1],[2,3]], [[4,5],[6,7]]], dtype='float64')
c_val = numpy.asarray([[0,1,2,3], [4,5,6,7]], dtype='float64')
assert numpy.all(f(a_val)==c_val)
f = function([a], c, mode='FAST_RUN')
assert numpy.all(f(a_val)==c_val)
tensor.verify_grad(None, Flatten(2), [a_val])
def test_flatten_outdim_invalid():
a = dmatrix()
try:
c = flatten(a, 3)
assert False
except ValueError:
pass
try:
c = flatten(a, 0)
assert False
except ValueError:
pass
# TODO: write test case for Tile Op # TODO: write test case for Tile Op
def test_tile(): def test_tile():
print >> sys.stderr, "WARNING: No testcase for Tile"
pass pass
......
import cPickle
import sys
import numpy
import theano
from theano import tensor as T
import time
def test_no_reuse():
x = T.lvector()
y = T.lvector()
f = theano.function([x, y], x + y)
#provide both inputs in the first call
f(numpy.ones(10), numpy.ones(10))
try:
f(numpy.ones(10))
except TypeError:
return
assert not 'should not get here'
def test_gc():
x = T.dvector()
#print >> sys.stderr, 'BUILDING GRAPH'
for i in xrange(2): #TODO: 30 causes like LONG compilation due to MERGE
if i :
r = r + r/10
else:
r = x
optimizer=None
optimizer='fast_run'
for f_linker, g_linker in [
(theano.PerformLinker(allow_gc = True), theano.PerformLinker(allow_gc=False)),
(theano.OpWiseCLinker(allow_gc = True), theano.OpWiseCLinker(allow_gc=False))]:
#print >> sys.stderr, 'COMPILING'
f = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=f_linker))
g = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=f_linker))
pre_f = cPickle.dumps(f)
pre_g = cPickle.dumps(g)
#print >> sys.stderr, 'RUNNING'
f(numpy.ones(100, dtype='float64'))
g(numpy.ones(100, dtype='float64'))
post_f = cPickle.dumps(f)
post_g = cPickle.dumps(g)
#because allow_gc should leave the function un-changed by calling
assert len(pre_f) == len(post_f)
#because temporaries that weren't collected shouldn't be pickled anyway
len_post_f = len(post_f)
len_post_g = len(post_g)
assert len_post_f == len_post_g
def test_merge_opt_runtime():
"""In the original merge optimization, the following graph took like caused the MERGE
optimizer to exhibit really bad performance (quadratic? exponential?)
Ironically, there is actually no merging to do in this graph.
"""
x = T.dvector()
for i in xrange(50):
if i :
r = r + r/10
else:
r = x
t = time.time()
f = theano.function([x], r,mode='FAST_COMPILE')
dt = time.time() - t
assert dt < 5.0 #it should never take longer than 5 seconds to compile this graph
...@@ -446,7 +446,6 @@ def create(window_size=3, ...@@ -446,7 +446,6 @@ def create(window_size=3,
import pylearn.cost import pylearn.cost
print "BUILDING MODEL"
architecture = ConvolutionalMLP( \ architecture = ConvolutionalMLP( \
window_size = window_size, window_size = window_size,
n_quadratic_filters = n_quadratic_filters, n_quadratic_filters = n_quadratic_filters,
...@@ -457,8 +456,43 @@ def create(window_size=3, ...@@ -457,8 +456,43 @@ def create(window_size=3,
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode) model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
return model return model
def test_naacl_model(optimizer='fast_run'): def create_realistic(window_size=3,#7,
m = create(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer)) input_dimension=200,
output_vocabsize=23,
n_quadratic_filters=2,
token_representation_size=150,
concatenated_representation_size=400,
lr=0.001,
seed=123,
noise_level=0.2,
qfilter_relscale=0.1,
compile_mode=None):
""" Create a convolutional model. """
activation_function = T.tanh
import pylearn.cost
architecture = ConvolutionalMLP( \
window_size = window_size,
n_quadratic_filters = n_quadratic_filters,
activation_function = activation_function,
reconstruction_cost_function = pylearn.cost.quadratic,
tie_weights = False
)
model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
return model
def test_naacl_model(optimizer='fast_run', iters_per_unsup=10, iters_per_sup=10,
realistic=False):
print "BUILDING MODEL"
import time
t = time.time()
if realistic:
m = create_realistic(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer))
else:
m = create(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer))
print 'BUILD took', time.time() - t
prog_str = [] prog_str = []
idx_of_node = {} idx_of_node = {}
for i, node in enumerate(m.pretraining_update.maker.env.toposort()): for i, node in enumerate(m.pretraining_update.maker.env.toposort()):
...@@ -480,21 +514,23 @@ def test_naacl_model(optimizer='fast_run'): ...@@ -480,21 +514,23 @@ def test_naacl_model(optimizer='fast_run'):
print 'UNSUPERVISED PHASE' print 'UNSUPERVISED PHASE'
for i in xrange(10): for i in xrange(10):
for i in xrange(10): for j in xrange(iters_per_unsup):
m.pretraining_update(*inputs) m.pretraining_update(*inputs)
s0, s1 = [str(i) for i in m.pretraining_update(*inputs)] s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
print s0, s1 print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
assert s0.startswith('0.40218760858') if iters_per_unsup == 10:
assert s1.startswith('0.074450801777') assert s0.startswith('0.40218760858')
assert s1.startswith('0.074450801777')
print 'FINETUNING GRAPH' print 'FINETUNING GRAPH'
print 'SUPERVISED PHASE COSTS (%s)'%optimizer print 'SUPERVISED PHASE COSTS (%s)'%optimizer
for i in xrange(10): for i in xrange(10):
for i in xrange(10): for j in xrange(iters_per_unsup):
m.finetuning_update(*(inputs + [targets])) m.finetuning_update(*(inputs + [targets]))
s0 = str(m.finetuning_update(*(inputs + [targets]))) s0 = str(m.finetuning_update(*(inputs + [targets])))
print s0 print iters_per_sup * (i+1), s0
assert s0.startswith('15.651277636') if iters_per_sup == 10:
assert s0.startswith('15.651277636')
if __name__ == '__main__': if __name__ == '__main__':
from theano import gof from theano import gof
...@@ -502,4 +538,5 @@ if __name__ == '__main__': ...@@ -502,4 +538,5 @@ if __name__ == '__main__':
print 'JTEST', JTEST print 'JTEST', JTEST
theano.compile.register_optimizer('JTEST', JTEST) theano.compile.register_optimizer('JTEST', JTEST)
optimizer = eval(sys.argv[1]) optimizer = eval(sys.argv[1])
test_naacl_model(optimizer) test_naacl_model(optimizer, 10, 10, realistic=False)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论