提交 030bb405 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -221,81 +221,7 @@ Advanced example ...@@ -221,81 +221,7 @@ Advanced example
Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system: Complex models can be implemented by subclassing ``Module`` (though that is not mandatory). Here is a complete, extensible (and working) regression model implemented using this system:
.. code-block:: python .. literalinclude:: ../code/regression.py
class RegressionLayer(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = T.scalar() # a stepsize for gradient descent
# PARAMETERS
self.w = T.matrix() #the linear transform to apply to our input points
self.b = T.vector() #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
# INTERFACE METHODS
self.update = M.Method([input, target],
self.cost,
updates={self.w: self.w - self.stepsize * self.grad_w,
self.b: self.b - self.stepsize * self.grad_b})
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = N.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = N.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SoftmaxXERegression(RegressionLayer):
""" XE means cross entropy"""
def build_prediction(self):
return NN.softmax(self.activation)
def build_classification_cost(self, target):
#self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
self.classification_cost_matrix = (target - self.prediction)**2
self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
Here is how we use the model: Here is how we use the model:
......
...@@ -7,6 +7,7 @@ import theano ...@@ -7,6 +7,7 @@ import theano
import theano.tensor import theano.tensor
from theano.compile import debugmode from theano.compile import debugmode
import theano.compile import theano.compile
import unittest
def test0(): def test0():
x = theano.tensor.dvector() x = theano.tensor.dvector()
...@@ -342,128 +343,159 @@ def test_baddestroymap_c(): ...@@ -342,128 +343,159 @@ def test_baddestroymap_c():
pass pass
def test_badviewmap(): class Test_ViewMap(unittest.TestCase):
class BadAdd(gof.Op):
class BadAddRef(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = b.type() c = b.type()
return gof.Apply(self, [a,b], [c]) return gof.Apply(self, [a,b], [c])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,)):
c[0] = b c[0] = b
x = theano.tensor.dvector() class BadAddSlice(gof.Op):
y = theano.tensor.dvector()
f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE')
try:
f([1,2], [3,4])
assert False #failed to raise error
except debugmode.BadViewMap:
return
def test_badviewmap_c():
x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
try:
f([1,2])
assert False #failed to raise error
except debugmode.BadDestroyMap:
pass
def test_aliased_outputs_ok():
#here aliased outputs is ok because they are both aliased to an input as well
class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]}
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
c[0] = a
d[0] = a[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4])
assert numpy.all(r0 == [1,2,3,4])
assert numpy.all(r1 == [2,3,4])
def test_aliased_outputs_ok_output():
# here aliased outputs is ok because they are both outputs of the function as a whole and
# thus not destroy-able
class CustomOp(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = b.type()
d = a.type() return gof.Apply(self, [a,b], [c])
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
r = a * 2
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4])
assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r1 == [4,6,8])
def test_aliased_outputs_ok_shadow():
# here the alias between outputs is ok because one of them is not used for subsequent
# computation. This is like the case where we use one output as a memory buffer to serve
# another output.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)):
r = a * 1
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4])
assert numpy.all(r0 == [2,4,6,8])
def test_aliased_outputs_bad():
# here the alias between outputs is not ok because destroying one destroys the other, but
# there's no way to warn theano about it through the view_map mechanism.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,)): def perform(self, node, (a,b), (c,)):
r = a * 1 c[0] = b[1:3]
c[0] = r[:-1]
d[0] = r[1:] def test_badviewmap_ref(self):
custom_op = CustomOp() x = theano.tensor.dvector()
y = theano.tensor.dvector()
x = theano.tensor.dvector() f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE')
y = theano.tensor.dvector() try:
bad_xy0, bad_xy1 = custom_op(x, y) f([1,2], [3,4])
out = bad_xy0 * 2 + bad_xy1 * 2 assert False #failed to raise error
f = theano.function([x, y], out, mode='DEBUG_MODE') except debugmode.BadViewMap:
return
try:
r0 = f([1,2,3,4]) def test_badviewmap_slice(self):
assert False # DebugMode should have caught the error x = theano.tensor.dvector()
except debugmode.BadViewMap, e: y = theano.tensor.dvector()
pass f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE')
try:
# the situation can be rescued by picking one of the inputs and pretending that it is f([1,2], [3,4])
# aliased to both the outputs. This unfairly disables any destructive operations on the assert False #failed to raise error
# input, but guarantees correctness. except debugmode.BadViewMap:
custom_op.view_map = {0:[0], 1:[1]} return
f([1,2,3,4])
def test_goodviewmap(self):
goodop = self.BadAddRef()
goodop.view_map = {0: [1]}
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE')
try:
f([1,5,1], [3,4,2,1,4])
return
except debugmode.BadViewMap:
assert False #failed to raise error
def test_badviewmap_c(self):
x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
try:
f([1,2])
assert False #failed to raise error
except debugmode.BadViewMap:
pass
def test_aliased_outputs_ok(self):
#here aliased outputs is ok because they are both aliased to an input as well
class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]}
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
c[0] = a
d[0] = a[1:]
x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [1,2,3,4])
assert numpy.all(r1 == [2,3,4])
def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the function as a whole and
# thus not destroy-able
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 2
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector()
y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r1 == [4,6,8])
def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used for subsequent
# computation. This is like the case where we use one output as a memory buffer to serve
# another output.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 1
c[0] = r
d[0] = r[1:]
x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4],[5,6,7,8])
assert numpy.all(r0 == [2,4,6,8])
def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one destroys the other, but
# there's no way to warn theano about it through the view_map mechanism.
class CustomOp(gof.Op):
def make_node(self, a, b):
c = a.type()
d = a.type()
return gof.Apply(self, [a,b], [c,d])
def perform(self, node, (a,b), (c,d)):
r = a * 1
c[0] = r[:-1]
d[0] = r[1:]
custom_op = CustomOp()
x = theano.tensor.dvector()
y = theano.tensor.dvector()
bad_xy0, bad_xy1 = custom_op(x, y)
out = bad_xy0 * 2 + bad_xy1 * 2
f = theano.function([x, y], out, mode='DEBUG_MODE')
try:
r0 = f([1,2,3,4],[5,6,7,8])
assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e:
print e
pass
# the situation can be rescued by picking one of the inputs and pretending that it is
# aliased to both the outputs. This unfairly disables any destructive operations on the
# input, but guarantees correctness.
#custom_op.view_map = {0:[0], 1:[1]}
#f([1,2,3,4],[5,6,7,8])
...@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn() ...@@ -114,7 +114,7 @@ tanh_rnn = TanhRnn()
class TanhRnnGrad(Op): class TanhRnnGrad(Op):
"""Gradient calculation for TanhRnn""" """Gradient calculation for TanhRnn"""
view_map = {0: [2]}
def __init__(self): def __init__(self):
pass pass
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论