提交 030bb405 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -221,81 +221,7 @@ Advanced example
Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system:
.. code-block:: python
class RegressionLayer(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = T.scalar() # a stepsize for gradient descent
# PARAMETERS
self.w = T.matrix() #the linear transform to apply to our input points
self.b = T.vector() #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
# INTERFACE METHODS
self.update = M.Method([input, target],
self.cost,
updates={self.w: self.w - self.stepsize * self.grad_w,
self.b: self.b - self.stepsize * self.grad_b})
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = N.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = N.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SoftmaxXERegression(RegressionLayer):
""" XE means cross entropy"""
def build_prediction(self):
return NN.softmax(self.activation)
def build_classification_cost(self, target):
#self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
self.classification_cost_matrix = (target - self.prediction)**2
self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
.. literalinclude:: ../code/regression.py
Here is how we use the model:
......
......@@ -136,24 +136,27 @@ class BadDestroyMap(DebugModeError):
class BadViewMap(DebugModeError):
"""Exception: Some perform() or c_code() created a memory alias that wasn't in the view_map"""
def __init__(self, node, idx, old_val, new_val):
def __init__(self, node, output_idx, out_storage, in_alias_idx=None, out_alias_idx=None):
super(BadViewMap, self).__init__()
self.node = node
self.idx = idx
self.old_val = old_val
self.new_val = new_val
self.output_idx = output_idx
self.out_storage = out_storage
self.in_alias_idx = in_alias_idx
self.out_alias_idx = out_alias_idx
def __str__(self):
sio = StringIO()
print >> sio, " node:", self.node
print >> sio, " node.inputs:", [(str(i), id(i)) for i in self.node.inputs]
print >> sio, " node.outputs:", [(str(i), id(i)) for i in self.node.outputs]
print >> sio, " view_map:", getattr(self.node.op, 'view_map', {})
print >> sio, " changed input idx:", self.idx
print >> sio, " changed input type:", self.node.inputs[self.idx].type
print >> sio, " repr (old val):", repr(self.old_val)
print >> sio, " repr (new val):", repr(self.new_val)
print >> sio, ""
print >> sio, " Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation that compares node input values"
print >> sio, " destroy_map:", getattr(self.node.op, 'destroy_map', {})
print >> sio, " aliased output:", self.output_idx
print >> sio, " aliased output storage:", self.out_storage
if self.in_alias_idx:
print >> sio, " aliased to inputs:", self.in_alias_idx
if self.out_alias_idx:
print >> sio, " aliased to outputs:", self.out_alias_idx
return sio.getvalue()
class StochasticOrder(DebugModeError):
......@@ -273,7 +276,85 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, clobber_dr_v
else:
raise BadDestroyMap(node, r_idx, r_vals[r], storage_map[r][0])
def _check_viewmap(node, storage_map):
"""
This functions raises a BadViewMap exception when it detects the following:
- output node storages aliased to input storage, with no declaration in view_map
- if not aliased to an input, check if two outputs are aliased together
and used subsequently in the graph
"""
for oi, onode in enumerate(node.outputs):
input_alias = None
outstorage = storage_map[onode][0]
instorage_id = [id(storage_map[i][0]) for i in node.inputs]
# TODO: investigate ways in which other Types may be aliased
# TODO: consider adding a function to Type to detect aliasing
danger_flag = id(outstorage) in instorage_id or\
(type(outstorage)==numpy.ndarray and
outstorage.flags['OWNDATA']==False)
if danger_flag:
# first find out which input it aliases
# In theory, theano's view_map only allows for 1 output to alias 1 input
# Checking for multiple aliases just in case...
alias = {}
for ii, inode in enumerate(node.inputs):
if _may_share_memory(outstorage, storage_map[inode][0]):
alias[ii] = (ii,inode)
# if its aliased but its declared in the view/destroy map = OK
viewmapped = False
view_map = getattr(node.op, 'view_map', {})
destroy_map = getattr(node.op, 'destroy_map', {})
for key,val in view_map.items()+destroy_map.items():
val = val[0] # view_map stores a list with single-entries
if key==oi and val in alias.keys():
# pfeew, its viewmapped. we're good
input_alias = alias.pop(val)
# if there's anything left in alias, there's a problem
if len(alias):
raise BadViewMap(node, oi, outstorage, alias.keys())
#need to check output->output aliasing as well
if not input_alias and _is_used_in_graph(onode):
for other_oi, other_onode in enumerate(node.outputs):
if other_oi==oi: continue
other_storage = storage_map[other_onode][0]
# check to see if we share memory with this other output
# this is not a problem if the node is not actually used
if _is_used_in_graph(other_onode) and \
_may_share_memory(outstorage, other_storage):
raise BadViewMap(node, oi, outstorage, out_alias_idx=other_oi)
def _may_share_memory(a, b):
return (hasattr(a,'__array_interface__') and
hasattr(b,'__array_interface__') and
numpy.may_share_memory(a,b))
def _is_function_output(node):
"""
Returns True if the node in question is the a final output of the graph
"""
return node.clients==[('output', 1)]
def _is_used_in_graph(node):
return not(_is_function_output(node) or node.clients==[])
def _lessbroken_deepcopy(a):
"""
:param a: any object
Returns a copy of `a` that shares no internal storage with the original. A deep copy.
This function handles numpy arrays specially to avoid some bug I had one time... (possibly
about copying 1-d arrays?)
"""
# this exists because numpy copies are broken
if type(a) is numpy.ndarray:
rval = numpy.array(a, copy=True, dtype=a.dtype)
else:
......@@ -718,70 +799,74 @@ class _Linker(gof.link.LocalLinker):
for r, s in storage_map.iteritems():
assert s[0] is None
try:
# compute the value of all variables
for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py, thunks_c, order)):
this_node_destroyed_variables = set()
#try:
# compute the value of all variables
for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py, thunks_c, order)):
this_node_destroyed_variables = set()
# put a copy of each input into the storage_map
# also, check that inputs have valid values
for r in node.inputs:
assert isinstance(r, gof.Variable)
assert r in r_vals
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
# put a copy of each input into the storage_map
# also, check that inputs have valid values
for r in node.inputs:
assert isinstance(r, gof.Variable)
assert r in r_vals
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
if thunk_py:
thunk_py()
if thunk_py:
thunk_py()
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=True)
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=True)
_check_viewmap(node, storage_map)
# check output values for type-correctness
#retrieve each output from the storage_map
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
#if r in r_vals:
#print >> sys.stderr, 'OUTPUT', r, 'ALREADY HAS_VALUE!', r_vals[r], 'WHAT ABOUT', storage_map[r][0]
assert r not in r_vals
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map of outputs for the thunk_c
# check output values for type-correctness
#retrieve each output from the storage_map
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
#if r in r_vals:
#print >> sys.stderr, 'OUTPUT', r, 'ALREADY HAS_VALUE!', r_vals[r], 'WHAT ABOUT', storage_map[r][0]
assert r not in r_vals
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map of outputs for the thunk_c
if thunk_c:
if thunk_c:
for r in node.inputs:
# TODO: we only need to overwrite the non-destroyed inputs
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
for r in node.inputs:
# TODO: we only need to overwrite the non-destroyed inputs
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
thunk_c()
thunk_c()
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False)
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False)
for r in node.outputs:
# check output values for type-correctness
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
_check_viewmap(node, storage_map)
if r in r_vals:
# compares the version from thunk_py (in r_vals)
# to the version produced by thunk_c (in storage_map)
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
raise BadClinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0])
else:
#retrieve each output from the storage_map
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map for the thunk_c
for r in node.outputs:
# check output values for type-correctness
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0])
# we're done with this thunk
# clear everything out of the storage_map
for r in node.inputs:
storage_map[r][0] = None
if r in r_vals:
# compares the version from thunk_py (in r_vals)
# to the version produced by thunk_c (in storage_map)
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
raise BadClinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0])
else:
#retrieve each output from the storage_map
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map for the thunk_c
# we're done with this thunk
# clear everything out of the storage_map
for r in node.inputs:
storage_map[r][0] = None
except:
raise_with_op(node)
#except:
# raise_with_op(node)
_find_bad_optimizations(order, env.equivalence_tracker.reasons, r_vals)
......@@ -898,7 +983,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
pass
print >> sys.stderr, "EXITING"
sys.exit(1)
sys.exit(1) #there is a ticket related to not calling sys.exit here.
break
else:
if self.verbose:
......
......@@ -7,6 +7,7 @@ import theano
import theano.tensor
from theano.compile import debugmode
import theano.compile
import unittest
def test0():
x = theano.tensor.dvector()
......@@ -342,128 +343,159 @@ def test_baddestroymap_c():
pass
def test_badviewmap():
class BadAdd(gof.Op):
class Test_ViewMap(unittest.TestCase):
class BadAddRef(gof.Op):
def make_node(self, a, b):
c = b.type()
return gof.Apply(self, [a,b], [c])
def perform(self, node, (a,b), (c,)):
c[0] = b
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE')
try:
f([1,2], [3,4])
assert False #failed to raise error
except debugmode.BadViewMap:
return
def test_badviewmap_c():
x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
try:
f([1,2])
assert False #failed to raise error
except debugmode.BadDestroyMap:
pass
def test_aliased_outputs_ok():
#here aliased outputs is ok because they are both aliased to an input as well
class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]}
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
c[0] = a
d[0] = a[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4])
assert numpy.all(r0 == [1,2,3,4])
assert numpy.all(r1 == [2,3,4])
def test_aliased_outputs_ok_output():
# here aliased outputs is ok because they are both outputs of the function as a whole and
# thus not destroy-able
class CustomOp(gof.Op):
class BadAddSlice(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
r = a * 2
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4])
assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r1 == [4,6,8])
def test_aliased_outputs_ok_shadow():
# here the alias between outputs is ok because one of them is not used for subsequent
# computation. This is like the case where we use one output as a memory buffer to serve
# another output.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
r = a * 1
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4])
assert numpy.all(r0 == [2,4,6,8])
def test_aliased_outputs_bad():
# here the alias between outputs is not ok because destroying one destroys the other, but
# there's no way to warn theano about it through the view_map mechanism.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
c = b.type()
return gof.Apply(self, [a,b], [c])
def perform(self, node, (a,b), (c,)):
r = a * 1
c[0] = r[:-1]
d[0] = r[1:]
custom_op = CustomOp()
x = theano.tensor.dvector()
y = theano.tensor.dvector()
bad_xy0, bad_xy1 = custom_op(x, y)
out = bad_xy0 * 2 + bad_xy1 * 2
f = theano.function([x, y], out, mode='DEBUG_MODE')
try:
r0 = f([1,2,3,4])
assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e:
pass
# the situation can be rescued by picking one of the inputs and pretending that it is
# aliased to both the outputs. This unfairly disables any destructive operations on the
# input, but guarantees correctness.
custom_op.view_map = {0:[0], 1:[1]}
f([1,2,3,4])
c[0] = b[1:3]
def test_badviewmap_ref(self):
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE')
try:
f([1,2], [3,4])
assert False #failed to raise error
except debugmode.BadViewMap:
return
def test_badviewmap_slice(self):
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE')
try:
f([1,2], [3,4])
assert False #failed to raise error
except debugmode.BadViewMap:
return
def test_goodviewmap(self):
goodop = self.BadAddRef()
goodop.view_map = {0: [1]}
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE')
try:
f([1,5,1], [3,4,2,1,4])
return
except debugmode.BadViewMap:
assert False #failed to raise error
def test_badviewmap_c(self):
x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
try:
f([1,2])
assert False #failed to raise error
except debugmode.BadViewMap:
pass
def test_aliased_outputs_ok(self):
#here aliased outputs is ok because they are both aliased to an input as well
class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]}
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
c[0] = a
d[0] = a[1:]
x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [1,2,3,4])
assert numpy.all(r1 == [2,3,4])
def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the function as a whole and
# thus not destroy-able
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 2
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r1 == [4,6,8])
def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used for subsequent
# computation. This is like the case where we use one output as a memory buffer to serve
# another output.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 1
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8])
def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one destroys the other, but
# there's no way to warn theano about it through the view_map mechanism.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 1
c[0] = r[:-1]
d[0] = r[1:]
custom_op = CustomOp()
x = theano.tensor.dvector()
y = theano.tensor.dvector()
bad_xy0, bad_xy1 = custom_op(x, y)
out = bad_xy0 * 2 + bad_xy1 * 2
f = theano.function([x, y], out, mode='DEBUG_MODE')
try:
r0 = f([1,2,3,4],[5,6,7,8])
assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e:
print e
pass
# the situation can be rescued by picking one of the inputs and pretending that it is
# aliased to both the outputs. This unfairly disables any destructive operations on the
# input, but guarantees correctness.
#custom_op.view_map = {0:[0], 1:[1]}
#f([1,2,3,4],[5,6,7,8])
......@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn()
class TanhRnnGrad(Op):
"""Gradient calculation for TanhRnn"""
view_map = {0: [2]}
def __init__(self):
pass
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论