提交 030bb405 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -221,81 +221,7 @@ Advanced example ...@@ -221,81 +221,7 @@ Advanced example
Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system: Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system:
.. code-block:: python .. literalinclude:: ../code/regression.py
class RegressionLayer(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = T.scalar() # a stepsize for gradient descent
# PARAMETERS
self.w = T.matrix() #the linear transform to apply to our input points
self.b = T.vector() #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
# INTERFACE METHODS
self.update = M.Method([input, target],
self.cost,
updates={self.w: self.w - self.stepsize * self.grad_w,
self.b: self.b - self.stepsize * self.grad_b})
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = N.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = N.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SoftmaxXERegression(RegressionLayer):
""" XE means cross entropy"""
def build_prediction(self):
return NN.softmax(self.activation)
def build_classification_cost(self, target):
#self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
self.classification_cost_matrix = (target - self.prediction)**2
self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
Here is how we use the model: Here is how we use the model:
......
...@@ -136,24 +136,27 @@ class BadDestroyMap(DebugModeError): ...@@ -136,24 +136,27 @@ class BadDestroyMap(DebugModeError):
class BadViewMap(DebugModeError): class BadViewMap(DebugModeError):
"""Exception: Some perform() or c_code() created a memory alias that wasn't in the view_map""" """Exception: Some perform() or c_code() created a memory alias that wasn't in the view_map"""
def __init__(self, node, idx, old_val, new_val): def __init__(self, node, output_idx, out_storage, in_alias_idx=None, out_alias_idx=None):
super(BadViewMap, self).__init__() super(BadViewMap, self).__init__()
self.node = node self.node = node
self.idx = idx self.output_idx = output_idx
self.old_val = old_val self.out_storage = out_storage
self.new_val = new_val self.in_alias_idx = in_alias_idx
self.out_alias_idx = out_alias_idx
def __str__(self): def __str__(self):
sio = StringIO() sio = StringIO()
print >> sio, " node:", self.node print >> sio, " node:", self.node
print >> sio, " node.inputs:", [(str(i), id(i)) for i in self.node.inputs] print >> sio, " node.inputs:", [(str(i), id(i)) for i in self.node.inputs]
print >> sio, " node.outputs:", [(str(i), id(i)) for i in self.node.outputs]
print >> sio, " view_map:", getattr(self.node.op, 'view_map', {}) print >> sio, " view_map:", getattr(self.node.op, 'view_map', {})
print >> sio, " changed input idx:", self.idx print >> sio, " destroy_map:", getattr(self.node.op, 'destroy_map', {})
print >> sio, " changed input type:", self.node.inputs[self.idx].type print >> sio, " aliased output:", self.output_idx
print >> sio, " repr (old val):", repr(self.old_val) print >> sio, " aliased output storage:", self.out_storage
print >> sio, " repr (new val):", repr(self.new_val) if self.in_alias_idx:
print >> sio, "" print >> sio, " aliased to inputs:", self.in_alias_idx
print >> sio, " Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation that compares node input values" if self.out_alias_idx:
print >> sio, " aliased to outputs:", self.out_alias_idx
return sio.getvalue() return sio.getvalue()
class StochasticOrder(DebugModeError): class StochasticOrder(DebugModeError):
...@@ -273,7 +276,85 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, clobber_dr_v ...@@ -273,7 +276,85 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, clobber_dr_v
else: else:
raise BadDestroyMap(node, r_idx, r_vals[r], storage_map[r][0]) raise BadDestroyMap(node, r_idx, r_vals[r], storage_map[r][0])
def _check_viewmap(node, storage_map):
"""
This functions raises a BadViewMap exception when it detects the following:
- output node storages aliased to input storage, with no declaration in view_map
- if not aliased to an input, check if two outputs are aliased together
and used subsequently in the graph
"""
for oi, onode in enumerate(node.outputs):
input_alias = None
outstorage = storage_map[onode][0]
instorage_id = [id(storage_map[i][0]) for i in node.inputs]
# TODO: investigate ways in which other Types may be aliased
# TODO: consider adding a function to Type to detect aliasing
danger_flag = id(outstorage) in instorage_id or\
(type(outstorage)==numpy.ndarray and
outstorage.flags['OWNDATA']==False)
if danger_flag:
# first find out which input it aliases
# In theory, theano's view_map only allows for 1 output to alias 1 input
# Checking for multiple aliases just in case...
alias = {}
for ii, inode in enumerate(node.inputs):
if _may_share_memory(outstorage, storage_map[inode][0]):
alias[ii] = (ii,inode)
# if its aliased but its declared in the view/destroy map = OK
viewmapped = False
view_map = getattr(node.op, 'view_map', {})
destroy_map = getattr(node.op, 'destroy_map', {})
for key,val in view_map.items()+destroy_map.items():
val = val[0] # view_map stores a list with single-entries
if key==oi and val in alias.keys():
# pfeew, its viewmapped. we're good
input_alias = alias.pop(val)
# if there's anything left in alias, there's a problem
if len(alias):
raise BadViewMap(node, oi, outstorage, alias.keys())
#need to check output->output aliasing as well
if not input_alias and _is_used_in_graph(onode):
for other_oi, other_onode in enumerate(node.outputs):
if other_oi==oi: continue
other_storage = storage_map[other_onode][0]
# check to see if we share memory with this other output
# this is not a problem if the node is not actually used
if _is_used_in_graph(other_onode) and \
_may_share_memory(outstorage, other_storage):
raise BadViewMap(node, oi, outstorage, out_alias_idx=other_oi)
def _may_share_memory(a, b):
return (hasattr(a,'__array_interface__') and
hasattr(b,'__array_interface__') and
numpy.may_share_memory(a,b))
def _is_function_output(node):
"""
Returns True if the node in question is the a final output of the graph
"""
return node.clients==[('output', 1)]
def _is_used_in_graph(node):
return not(_is_function_output(node) or node.clients==[])
def _lessbroken_deepcopy(a): def _lessbroken_deepcopy(a):
"""
:param a: any object
Returns a copy of `a` that shares no internal storage with the original. A deep copy.
This function handles numpy arrays specially to avoid some bug I had one time... (possibly
about copying 1-d arrays?)
"""
# this exists because numpy copies are broken
if type(a) is numpy.ndarray: if type(a) is numpy.ndarray:
rval = numpy.array(a, copy=True, dtype=a.dtype) rval = numpy.array(a, copy=True, dtype=a.dtype)
else: else:
...@@ -718,7 +799,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -718,7 +799,7 @@ class _Linker(gof.link.LocalLinker):
for r, s in storage_map.iteritems(): for r, s in storage_map.iteritems():
assert s[0] is None assert s[0] is None
try: #try:
# compute the value of all variables # compute the value of all variables
for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py, thunks_c, order)): for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py, thunks_c, order)):
this_node_destroyed_variables = set() this_node_destroyed_variables = set()
...@@ -738,6 +819,8 @@ class _Linker(gof.link.LocalLinker): ...@@ -738,6 +819,8 @@ class _Linker(gof.link.LocalLinker):
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set, _check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=True) clobber_dr_vals=True)
_check_viewmap(node, storage_map)
# check output values for type-correctness # check output values for type-correctness
#retrieve each output from the storage_map #retrieve each output from the storage_map
for r in node.outputs: for r in node.outputs:
...@@ -760,6 +843,8 @@ class _Linker(gof.link.LocalLinker): ...@@ -760,6 +843,8 @@ class _Linker(gof.link.LocalLinker):
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set, _check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False) clobber_dr_vals=False)
_check_viewmap(node, storage_map)
for r in node.outputs: for r in node.outputs:
# check output values for type-correctness # check output values for type-correctness
if not r.type.is_valid_value(storage_map[r][0]): if not r.type.is_valid_value(storage_map[r][0]):
...@@ -780,8 +865,8 @@ class _Linker(gof.link.LocalLinker): ...@@ -780,8 +865,8 @@ class _Linker(gof.link.LocalLinker):
for r in node.inputs: for r in node.inputs:
storage_map[r][0] = None storage_map[r][0] = None
except: #except:
raise_with_op(node) # raise_with_op(node)
_find_bad_optimizations(order, env.equivalence_tracker.reasons, r_vals) _find_bad_optimizations(order, env.equivalence_tracker.reasons, r_vals)
...@@ -898,7 +983,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions ...@@ -898,7 +983,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
pass pass
print >> sys.stderr, "EXITING" print >> sys.stderr, "EXITING"
sys.exit(1) sys.exit(1) #there is a ticket related to not calling sys.exit here.
break break
else: else:
if self.verbose: if self.verbose:
......
...@@ -7,6 +7,7 @@ import theano ...@@ -7,6 +7,7 @@ import theano
import theano.tensor import theano.tensor
from theano.compile import debugmode from theano.compile import debugmode
import theano.compile import theano.compile
import unittest
def test0(): def test0():
x = theano.tensor.dvector() x = theano.tensor.dvector()
...@@ -342,34 +343,65 @@ def test_baddestroymap_c(): ...@@ -342,34 +343,65 @@ def test_baddestroymap_c():
pass pass
def test_badviewmap(): class Test_ViewMap(unittest.TestCase):
class BadAdd(gof.Op):
class BadAddRef(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = b.type() c = b.type()
return gof.Apply(self, [a,b], [c]) return gof.Apply(self, [a,b], [c])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,)):
c[0] = b c[0] = b
class BadAddSlice(gof.Op):
def make_node(self, a, b):
c = b.type()
return gof.Apply(self, [a,b], [c])
def perform(self, node, (a,b), (c,)):
c[0] = b[1:3]
def test_badviewmap_ref(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE')
try:
f([1,2], [3,4])
assert False #failed to raise error
except debugmode.BadViewMap:
return
def test_badviewmap_slice(self):
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE')
try: try:
f([1,2], [3,4]) f([1,2], [3,4])
assert False #failed to raise error assert False #failed to raise error
except debugmode.BadViewMap: except debugmode.BadViewMap:
return return
def test_badviewmap_c(): def test_goodviewmap(self):
goodop = self.BadAddRef()
goodop.view_map = {0: [1]}
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE')
try:
f([1,5,1], [3,4,2,1,4])
return
except debugmode.BadViewMap:
assert False #failed to raise error
def test_badviewmap_c(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False)) f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
try: try:
f([1,2]) f([1,2])
assert False #failed to raise error assert False #failed to raise error
except debugmode.BadDestroyMap: except debugmode.BadViewMap:
pass pass
def test_aliased_outputs_ok(): def test_aliased_outputs_ok(self):
#here aliased outputs is ok because they are both aliased to an input as well #here aliased outputs is ok because they are both aliased to an input as well
class CustomOp(gof.Op): class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]} view_map = {0:[0], 1:[0]}
...@@ -377,20 +409,20 @@ def test_aliased_outputs_ok(): ...@@ -377,20 +409,20 @@ def test_aliased_outputs_ok():
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,d)):
c[0] = a c[0] = a
d[0] = a[1:] d[0] = a[1:]
x = theano.tensor.dvector() x = theano.tensor.dvector('x')
y = theano.tensor.dvector() y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4]) r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [1,2,3,4]) assert numpy.all(r0 == [1,2,3,4])
assert numpy.all(r1 == [2,3,4]) assert numpy.all(r1 == [2,3,4])
def test_aliased_outputs_ok_output(): def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the function as a whole and # here aliased outputs is ok because they are both outputs of the function as a whole and
# thus not destroy-able # thus not destroy-able
class CustomOp(gof.Op): class CustomOp(gof.Op):
...@@ -398,7 +430,7 @@ def test_aliased_outputs_ok_output(): ...@@ -398,7 +430,7 @@ def test_aliased_outputs_ok_output():
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,d)):
r = a * 2 r = a * 2
c[0] = r c[0] = r
d[0] = r[1:] d[0] = r[1:]
...@@ -407,12 +439,12 @@ def test_aliased_outputs_ok_output(): ...@@ -407,12 +439,12 @@ def test_aliased_outputs_ok_output():
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4]) r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8]) assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r1 == [4,6,8]) assert numpy.all(r1 == [4,6,8])
def test_aliased_outputs_ok_shadow(): def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used for subsequent # here the alias between outputs is ok because one of them is not used for subsequent
# computation. This is like the case where we use one output as a memory buffer to serve # computation. This is like the case where we use one output as a memory buffer to serve
# another output. # another output.
...@@ -421,21 +453,21 @@ def test_aliased_outputs_ok_shadow(): ...@@ -421,21 +453,21 @@ def test_aliased_outputs_ok_shadow():
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,d)):
r = a * 1 r = a * 1
c[0] = r c[0] = r
d[0] = r[1:] d[0] = r[1:]
x = theano.tensor.dvector() x = theano.tensor.dvector('x')
y = theano.tensor.dvector() y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4]) r0 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8]) assert numpy.all(r0 == [2,4,6,8])
def test_aliased_outputs_bad(): def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one destroys the other, but # here the alias between outputs is not ok because destroying one destroys the other, but
# there's no way to warn theano about it through the view_map mechanism. # there's no way to warn theano about it through the view_map mechanism.
class CustomOp(gof.Op): class CustomOp(gof.Op):
...@@ -443,7 +475,7 @@ def test_aliased_outputs_bad(): ...@@ -443,7 +475,7 @@ def test_aliased_outputs_bad():
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,d)):
r = a * 1 r = a * 1
c[0] = r[:-1] c[0] = r[:-1]
d[0] = r[1:] d[0] = r[1:]
...@@ -456,14 +488,14 @@ def test_aliased_outputs_bad(): ...@@ -456,14 +488,14 @@ def test_aliased_outputs_bad():
f = theano.function([x, y], out, mode='DEBUG_MODE') f = theano.function([x, y], out, mode='DEBUG_MODE')
try: try:
r0 = f([1,2,3,4]) r0 = f([1,2,3,4],[5,6,7,8])
assert False # DebugMode should have caught the error assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e: except debugmode.BadViewMap, e:
print e
pass pass
# the situation can be rescued by picking one of the inputs and pretending that it is # the situation can be rescued by picking one of the inputs and pretending that it is
# aliased to both the outputs. This unfairly disables any destructive operations on the # aliased to both the outputs. This unfairly disables any destructive operations on the
# input, but guarantees correctness. # input, but guarantees correctness.
custom_op.view_map = {0:[0], 1:[1]} #custom_op.view_map = {0:[0], 1:[1]}
f([1,2,3,4]) #f([1,2,3,4],[5,6,7,8])
...@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn() ...@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn()
class TanhRnnGrad(Op): class TanhRnnGrad(Op):
"""Gradient calculation for TanhRnn""" """Gradient calculation for TanhRnn"""
view_map = {0: [2]}
def __init__(self): def __init__(self):
pass pass
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论