提交 3051ab8e authored 作者: Olivier Breuleux's avatar Olivier Breuleux

merge

......@@ -258,7 +258,7 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
gw = gradient.grad(loss, w)
gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
......@@ -284,8 +284,13 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
<<<<<<< /u/breuleuo/hg/theano2/_test_sparse.py
gw = gradient.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
=======
gw = tensor.grad(loss, w)
trainfn = compile.Function([x, w], [y, loss, gw])
>>>>>>> /tmp/_test_sparse.py~other.JkNMX5
x = xorig
w = mtype((500,3))
......
import traceback
from tensor import *
import tensor # for hidden symbols
......@@ -511,7 +512,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, to
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = gradient.grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '-------'
print '----------'
......@@ -846,7 +847,7 @@ class T_subtensor(unittest.TestCase):
n = as_tensor(numpy.random.rand(2,3))
z = scal.constant(0)
t = n[z:,z]
gn = gradient.grad(sum(exp(t)), n)
gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn])
s0 = 'array([ 2.05362099, 0. , 0. ])'
s1 = 'array([ 1.55009327, 0. , 0. ])'
......@@ -856,7 +857,7 @@ class T_subtensor(unittest.TestCase):
def test_grad_0d(self):
n = as_tensor(numpy.random.rand(2,3))
t = n[1,0]
gn = gradient.grad(sum(exp(t)), n)
gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn])
g0 = repr(gval[0,:])
g1 = repr(gval[1,:])
......@@ -1091,7 +1092,7 @@ class _testCase_matinv(unittest.TestCase):
# Sum of squared errors
ssdiff = sum((diff**2.0))
g_b = gradient.grad(ssdiff, b)
g_b = grad(ssdiff, b)
# compilation to function
# [a,b] are the inputs, [ssdiff,g_b] are the outputs
......@@ -1505,6 +1506,43 @@ class T_tensorfromscalar(unittest.TestCase):
# self.failUnless(t.data == 1.0)
# self.failUnless(t.data is not tt.data)
class _test_grad(unittest.TestCase):
class O(gof.op.Op):
def __init__(self):
self.inputs = [scalar('a'),scalar('c')]
self.outputs = [scalar('b'),scalar('d')]
self.gval0 = scalar('e')
self.gval1 = scalar('f')
def grad(self, (x0,x1), (gz0,gz1)):
return self.gval0, self.gval1
def test_1param(self):
"""grad: Test passing a single result param"""
a1 = _test_grad.O()
self.failUnless(a1.gval0 is grad(a1.outputs[0], a1.inputs[0]))
def test_Nparam(self):
"""grad: Test passing multiple result params"""
a1 = _test_grad.O()
g0,g1 = grad(a1.outputs[0], a1.inputs)
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
def test_1None_rval(self):
"""grad: Test returning a single None from grad"""
a1 = _test_grad.O()
self.failUnless(None is grad(a1.outputs[0], a1.outputs[1]))
self.failUnless(None is grad(a1.outputs[0], 'wtf'))
def test_NNone_rval(self):
"""grad: Test returning some Nones from grad"""
a1 = _test_grad.O()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + ['wtf'])
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
self.failUnless(None is g2)
......
......@@ -309,8 +309,6 @@ def fast_compute(*outputs):
# return rval
# StateFunction([x, y], [e], (w, w + lr * bla()))
......
......@@ -204,7 +204,8 @@ def stack_search(start, expand, mode='bfs', build_inv = False):
raise ValueError('mode should be bfs or dfs', mode)
rval_set = set()
rval_list = list()
start_pop = start.popleft if mode is 'bfs' else start.pop
if mode is 'bfs': start_pop = start.popleft
else: start_pop = start.pop
expand_inv = {}
while start:
l = start_pop()
......
......@@ -14,11 +14,22 @@ if sys.version_info[:2] < (2,5):
if element:
return True
return False
def partial(func, *args, **keywords):
def newfunc(*fargs, **fkeywords):
newkeywords = keywords.copy()
newkeywords.update(fkeywords)
return func(*(args + fargs), **newkeywords)
newfunc.func = func
newfunc.args = args
newfunc.keywords = keywords
return newfunc
else:
# Only bother with this else clause and the __all__ line if you are putting
# this in a separate file.
import __builtin__
all = __builtin__.all
any = __builtin__.any
import functools
partial = functools.partial
__all__ = ['all', 'any']
......@@ -108,24 +108,6 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r
return gmap
def grad(cost, param, g_cost=1.0):
"""
@type cost: L{Result}
@type param: L{Result} or list of L{Result}s.
@rtype: L{Result} or list of L{Result}s (depending upon I{param})
@return: symbolic expression of gradient of I{cost} wrt I{param}.
If I{param} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
inputs = gof.graph.inputs([cost])
gmap = grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(param, list):
return [gmap.get(p, None) for p in param]
else:
return gmap.get(param, None)
class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
......
......@@ -4,10 +4,9 @@ import math
from copy import copy
from functools import partial
import gof
from gof import PropertiedType, Op, PropertiedOp, utils, Result, Constant, Type, Apply, Env
from gof.python25 import partial
def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype)
......
......@@ -10,11 +10,12 @@ from gof import Result, Op, utils, Destroyer, Viewer, AbstractFunctionError, Typ
import gof
import blas # for gemm, dot
import gradient
import elemwise as s2t
import scalar as scal
from functools import partial
from gof.python25 import partial
def as_tensor(x, name = None):
......@@ -348,8 +349,10 @@ class _tensor_py_operators:
args = slice(*args),
return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Result)))
#COPYING
def copy(self): return tensor_copy(self)
def __iter__(self):
# This prevents accidental iteration via builtin.sum(self)
raise TypeError('Tensor does not support iteration')
class TensorResult(Result, _tensor_py_operators):
pass
......@@ -381,9 +384,20 @@ class TensorFromScalar(Op):
def perform(self, node, (s, ), (out, )):
out[0] = numpy.asarray(s)
def grad(self, (s,), (dt,)):
raise NotImplementedError('todo: ScalarFromTensor')
return [ScalarFromTensor(dt)]
tensor_from_scalar = TensorFromScalar()
class ScalarFromTensor(Op):
def __init__(self, s, **kwargs):
assert isinstance(s, Tensor)
Op.__init__(self, **kwargs)
self.inputs = [s]
self.outputs = [scal.Scalar(s.dtype)]
def perform(self):
self.outputs[0].data = self.inputs[0].data
def grad(self, (s,), (dt,)):
return [TensorFromScalar(dt)]
scalar_from_tensor = gof.op.constructor(ScalarFromTensor)
##########################
# Unary Operations
......@@ -531,10 +545,13 @@ class Subtensor_dx(Op, Viewer):
cdata = []
for c in self.idx_list:
if isinstance(c, slice):
cdata.append(slice(
None if c.start is None else self.inputs[c.start].data,
None if c.stop is None else self.inputs[c.stop].data,
None if c.step is None else self.inputs[c.step].data))
if c.start is None: start = None
else: start = self.inputs[c.start].data
if c.stop is None: stop = None
else: stop = self.inputs[c.stop].data
if c.step is None: step = None
else: step = self.inputs[c.step].data
cdata.append(slice(start, stop, step))
else:
d = self.inputs[c].data
assert 'int' in str(d.dtype)
......@@ -664,7 +681,6 @@ class Subtensor(Op):
# FIXME: this doesn't work if there are slices in the list because for some mysterious reason slice is unhashable
return hash(tuple(self.idx_list))
class SetSubtensor(Subtensor):
view_map = {}
destroy_map = {0: [0]}
......@@ -1025,3 +1041,31 @@ class Gemm(Op):
""" % dict(locals(), **sub)
gemm = Gemm()
#########################
# Gradient
#########################
def grad(cost, wrt, g_cost=None):
"""
@type cost: L{Result}
@type wrt: L{Result} or list of L{Result}s.
@type g_cost: L{Result} broadcastable to size of I{cost}, or None
@param g_cost: an expression for the gradient through cost. The default is
{{{ones_like(cost)}}}
@rtype: L{Result} or list of L{Result}s (depending upon I{wrt})
@return: symbolic expression of gradient of I{cost} with respect to I{wrt}.
If I{wrt} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
if g_cost is None:
g_cost = ones_like(cost)
inputs = gof.graph.inputs([cost])
gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(wrt, list):
return [gmap.get(p, None) for p in wrt]
else:
return gmap.get(wrt, None)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论