提交 3051ab8e authored 作者: Olivier Breuleux's avatar Olivier Breuleux

merge

...@@ -258,7 +258,7 @@ class _testCase_dot(unittest.TestCase): ...@@ -258,7 +258,7 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw)) y = dense_from_sparse(dot(w.T, xw))
diff = x-y diff = x-y
loss = tensor.sum(tensor.sqr(diff)) loss = tensor.sum(tensor.sqr(diff))
gw = gradient.grad(loss, w) gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw]) trainfn = compile.function([x, w], [y, loss, gw])
x = numpy.asarray([[1., 2], [3, 4], [2, 1]]) x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
...@@ -284,8 +284,13 @@ class _testCase_dot(unittest.TestCase): ...@@ -284,8 +284,13 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw)) y = dense_from_sparse(dot(w.T, xw))
diff = x-y diff = x-y
loss = tensor.sum(tensor.sqr(diff)) loss = tensor.sum(tensor.sqr(diff))
<<<<<<< /u/breuleuo/hg/theano2/_test_sparse.py
gw = gradient.grad(loss, w) gw = gradient.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw]) trainfn = compile.function([x, w], [y, loss, gw])
=======
gw = tensor.grad(loss, w)
trainfn = compile.Function([x, w], [y, loss, gw])
>>>>>>> /tmp/_test_sparse.py~other.JkNMX5
x = xorig x = xorig
w = mtype((500,3)) w = mtype((500,3))
......
import traceback
from tensor import * from tensor import *
import tensor # for hidden symbols import tensor # for hidden symbols
...@@ -511,7 +512,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, to ...@@ -511,7 +512,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, to
num_grad = gradient.numeric_grad(cost_fn, pt) num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = gradient.grad(cost, tensor_pt,as_tensor(1.0,name='g_cost')) symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0: if 0:
print '-------' print '-------'
print '----------' print '----------'
...@@ -846,7 +847,7 @@ class T_subtensor(unittest.TestCase): ...@@ -846,7 +847,7 @@ class T_subtensor(unittest.TestCase):
n = as_tensor(numpy.random.rand(2,3)) n = as_tensor(numpy.random.rand(2,3))
z = scal.constant(0) z = scal.constant(0)
t = n[z:,z] t = n[z:,z]
gn = gradient.grad(sum(exp(t)), n) gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn]) gval = eval_outputs([gn])
s0 = 'array([ 2.05362099, 0. , 0. ])' s0 = 'array([ 2.05362099, 0. , 0. ])'
s1 = 'array([ 1.55009327, 0. , 0. ])' s1 = 'array([ 1.55009327, 0. , 0. ])'
...@@ -856,7 +857,7 @@ class T_subtensor(unittest.TestCase): ...@@ -856,7 +857,7 @@ class T_subtensor(unittest.TestCase):
def test_grad_0d(self): def test_grad_0d(self):
n = as_tensor(numpy.random.rand(2,3)) n = as_tensor(numpy.random.rand(2,3))
t = n[1,0] t = n[1,0]
gn = gradient.grad(sum(exp(t)), n) gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn]) gval = eval_outputs([gn])
g0 = repr(gval[0,:]) g0 = repr(gval[0,:])
g1 = repr(gval[1,:]) g1 = repr(gval[1,:])
...@@ -1091,7 +1092,7 @@ class _testCase_matinv(unittest.TestCase): ...@@ -1091,7 +1092,7 @@ class _testCase_matinv(unittest.TestCase):
# Sum of squared errors # Sum of squared errors
ssdiff = sum((diff**2.0)) ssdiff = sum((diff**2.0))
g_b = gradient.grad(ssdiff, b) g_b = grad(ssdiff, b)
# compilation to function # compilation to function
# [a,b] are the inputs, [ssdiff,g_b] are the outputs # [a,b] are the inputs, [ssdiff,g_b] are the outputs
...@@ -1505,6 +1506,43 @@ class T_tensorfromscalar(unittest.TestCase): ...@@ -1505,6 +1506,43 @@ class T_tensorfromscalar(unittest.TestCase):
# self.failUnless(t.data == 1.0) # self.failUnless(t.data == 1.0)
# self.failUnless(t.data is not tt.data) # self.failUnless(t.data is not tt.data)
class _test_grad(unittest.TestCase):
class O(gof.op.Op):
def __init__(self):
self.inputs = [scalar('a'),scalar('c')]
self.outputs = [scalar('b'),scalar('d')]
self.gval0 = scalar('e')
self.gval1 = scalar('f')
def grad(self, (x0,x1), (gz0,gz1)):
return self.gval0, self.gval1
def test_1param(self):
"""grad: Test passing a single result param"""
a1 = _test_grad.O()
self.failUnless(a1.gval0 is grad(a1.outputs[0], a1.inputs[0]))
def test_Nparam(self):
"""grad: Test passing multiple result params"""
a1 = _test_grad.O()
g0,g1 = grad(a1.outputs[0], a1.inputs)
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
def test_1None_rval(self):
"""grad: Test returning a single None from grad"""
a1 = _test_grad.O()
self.failUnless(None is grad(a1.outputs[0], a1.outputs[1]))
self.failUnless(None is grad(a1.outputs[0], 'wtf'))
def test_NNone_rval(self):
"""grad: Test returning some Nones from grad"""
a1 = _test_grad.O()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + ['wtf'])
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
self.failUnless(None is g2)
......
...@@ -309,8 +309,6 @@ def fast_compute(*outputs): ...@@ -309,8 +309,6 @@ def fast_compute(*outputs):
# return rval # return rval
# StateFunction([x, y], [e], (w, w + lr * bla())) # StateFunction([x, y], [e], (w, w + lr * bla()))
......
...@@ -204,7 +204,8 @@ def stack_search(start, expand, mode='bfs', build_inv = False): ...@@ -204,7 +204,8 @@ def stack_search(start, expand, mode='bfs', build_inv = False):
raise ValueError('mode should be bfs or dfs', mode) raise ValueError('mode should be bfs or dfs', mode)
rval_set = set() rval_set = set()
rval_list = list() rval_list = list()
start_pop = start.popleft if mode is 'bfs' else start.pop if mode is 'bfs': start_pop = start.popleft
else: start_pop = start.pop
expand_inv = {} expand_inv = {}
while start: while start:
l = start_pop() l = start_pop()
......
...@@ -14,11 +14,22 @@ if sys.version_info[:2] < (2,5): ...@@ -14,11 +14,22 @@ if sys.version_info[:2] < (2,5):
if element: if element:
return True return True
return False return False
def partial(func, *args, **keywords):
def newfunc(*fargs, **fkeywords):
newkeywords = keywords.copy()
newkeywords.update(fkeywords)
return func(*(args + fargs), **newkeywords)
newfunc.func = func
newfunc.args = args
newfunc.keywords = keywords
return newfunc
else: else:
# Only bother with this else clause and the __all__ line if you are putting # Only bother with this else clause and the __all__ line if you are putting
# this in a separate file. # this in a separate file.
import __builtin__ import __builtin__
all = __builtin__.all all = __builtin__.all
any = __builtin__.any any = __builtin__.any
import functools
partial = functools.partial
__all__ = ['all', 'any'] __all__ = ['all', 'any']
...@@ -108,24 +108,6 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -108,24 +108,6 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r gmap[r] = g_r
return gmap return gmap
def grad(cost, param, g_cost=1.0):
"""
@type cost: L{Result}
@type param: L{Result} or list of L{Result}s.
@rtype: L{Result} or list of L{Result}s (depending upon I{param})
@return: symbolic expression of gradient of I{cost} wrt I{param}.
If I{param} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
inputs = gof.graph.inputs([cost])
gmap = grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(param, list):
return [gmap.get(p, None) for p in param]
else:
return gmap.get(param, None)
class numeric_grad: class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7): def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt. """Return the gradient of f at pt.
......
...@@ -4,10 +4,9 @@ import math ...@@ -4,10 +4,9 @@ import math
from copy import copy from copy import copy
from functools import partial
import gof import gof
from gof import PropertiedType, Op, PropertiedOp, utils, Result, Constant, Type, Apply, Env from gof import PropertiedType, Op, PropertiedOp, utils, Result, Constant, Type, Apply, Env
from gof.python25 import partial
def upcast(dtype, *dtypes): def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype) z = numpy.zeros((), dtype = dtype)
......
...@@ -10,11 +10,12 @@ from gof import Result, Op, utils, Destroyer, Viewer, AbstractFunctionError, Typ ...@@ -10,11 +10,12 @@ from gof import Result, Op, utils, Destroyer, Viewer, AbstractFunctionError, Typ
import gof import gof
import blas # for gemm, dot import blas # for gemm, dot
import gradient
import elemwise as s2t import elemwise as s2t
import scalar as scal import scalar as scal
from functools import partial from gof.python25 import partial
def as_tensor(x, name = None): def as_tensor(x, name = None):
...@@ -348,8 +349,10 @@ class _tensor_py_operators: ...@@ -348,8 +349,10 @@ class _tensor_py_operators:
args = slice(*args), args = slice(*args),
return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Result))) return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Result)))
#COPYING def __iter__(self):
def copy(self): return tensor_copy(self) # This prevents accidental iteration via builtin.sum(self)
raise TypeError('Tensor does not support iteration')
class TensorResult(Result, _tensor_py_operators): class TensorResult(Result, _tensor_py_operators):
pass pass
...@@ -381,9 +384,20 @@ class TensorFromScalar(Op): ...@@ -381,9 +384,20 @@ class TensorFromScalar(Op):
def perform(self, node, (s, ), (out, )): def perform(self, node, (s, ), (out, )):
out[0] = numpy.asarray(s) out[0] = numpy.asarray(s)
def grad(self, (s,), (dt,)): def grad(self, (s,), (dt,)):
raise NotImplementedError('todo: ScalarFromTensor') return [ScalarFromTensor(dt)]
tensor_from_scalar = TensorFromScalar() tensor_from_scalar = TensorFromScalar()
class ScalarFromTensor(Op):
def __init__(self, s, **kwargs):
assert isinstance(s, Tensor)
Op.__init__(self, **kwargs)
self.inputs = [s]
self.outputs = [scal.Scalar(s.dtype)]
def perform(self):
self.outputs[0].data = self.inputs[0].data
def grad(self, (s,), (dt,)):
return [TensorFromScalar(dt)]
scalar_from_tensor = gof.op.constructor(ScalarFromTensor)
########################## ##########################
# Unary Operations # Unary Operations
...@@ -531,10 +545,13 @@ class Subtensor_dx(Op, Viewer): ...@@ -531,10 +545,13 @@ class Subtensor_dx(Op, Viewer):
cdata = [] cdata = []
for c in self.idx_list: for c in self.idx_list:
if isinstance(c, slice): if isinstance(c, slice):
cdata.append(slice( if c.start is None: start = None
None if c.start is None else self.inputs[c.start].data, else: start = self.inputs[c.start].data
None if c.stop is None else self.inputs[c.stop].data, if c.stop is None: stop = None
None if c.step is None else self.inputs[c.step].data)) else: stop = self.inputs[c.stop].data
if c.step is None: step = None
else: step = self.inputs[c.step].data
cdata.append(slice(start, stop, step))
else: else:
d = self.inputs[c].data d = self.inputs[c].data
assert 'int' in str(d.dtype) assert 'int' in str(d.dtype)
...@@ -664,7 +681,6 @@ class Subtensor(Op): ...@@ -664,7 +681,6 @@ class Subtensor(Op):
# FIXME: this doesn't work if there are slices in the list because for some mysterious reason slice is unhashable # FIXME: this doesn't work if there are slices in the list because for some mysterious reason slice is unhashable
return hash(tuple(self.idx_list)) return hash(tuple(self.idx_list))
class SetSubtensor(Subtensor): class SetSubtensor(Subtensor):
view_map = {} view_map = {}
destroy_map = {0: [0]} destroy_map = {0: [0]}
...@@ -1025,3 +1041,31 @@ class Gemm(Op): ...@@ -1025,3 +1041,31 @@ class Gemm(Op):
""" % dict(locals(), **sub) """ % dict(locals(), **sub)
gemm = Gemm() gemm = Gemm()
#########################
# Gradient
#########################
def grad(cost, wrt, g_cost=None):
"""
@type cost: L{Result}
@type wrt: L{Result} or list of L{Result}s.
@type g_cost: L{Result} broadcastable to size of I{cost}, or None
@param g_cost: an expression for the gradient through cost. The default is
{{{ones_like(cost)}}}
@rtype: L{Result} or list of L{Result}s (depending upon I{wrt})
@return: symbolic expression of gradient of I{cost} with respect to I{wrt}.
If I{wrt} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
if g_cost is None:
g_cost = ones_like(cost)
inputs = gof.graph.inputs([cost])
gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(wrt, list):
return [gmap.get(p, None) for p in wrt]
else:
return gmap.get(wrt, None)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论