提交 529bb53d authored 作者: James Bergstra's avatar James Bergstra

moved grad() from gradient to tensor, as per #102

上级 5d01f64b
...@@ -228,43 +228,6 @@ class _test_grad_sources_inputs(unittest.TestCase): ...@@ -228,43 +228,6 @@ class _test_grad_sources_inputs(unittest.TestCase):
self.failUnless(g[a1.inputs[1]] == 11) self.failUnless(g[a1.inputs[1]] == 11)
class _test_grad(unittest.TestCase):
class O(gof.op.Op):
def __init__(self):
self.inputs = [gof.result.Result(),gof.result.Result()]
self.outputs = [gof.result.Result(),gof.result.Result()]
self.gval0 = gof.result.Result()
self.gval1 = gof.result.Result()
def grad(self, (x0,x1), (gz0,gz1)):
return self.gval0, self.gval1
def test_1param(self):
"""grad: Test passing a single result param"""
a1 = _test_grad.O()
self.failUnless(a1.gval0 is grad(a1.outputs[0], a1.inputs[0]))
def test_Nparam(self):
"""grad: Test passing multiple result params"""
a1 = _test_grad.O()
g0,g1 = grad(a1.outputs[0], a1.inputs)
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
def test_1None_rval(self):
"""grad: Test returning a single None from grad"""
a1 = _test_grad.O()
self.failUnless(None is grad(a1.outputs[0], a1.outputs[1]))
self.failUnless(None is grad(a1.outputs[0], 'wtf'))
def test_NNone_rval(self):
"""grad: Test returning some Nones from grad"""
a1 = _test_grad.O()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + ['wtf'])
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
self.failUnless(None is g2)
def matrix(): def matrix():
return tensor.Tensor('float64', [0,0]) return tensor.Tensor('float64', [0,0])
......
...@@ -258,7 +258,7 @@ class _testCase_dot(unittest.TestCase): ...@@ -258,7 +258,7 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw)) y = dense_from_sparse(dot(w.T, xw))
diff = x-y diff = x-y
loss = tensor.sum(tensor.sqr(diff)) loss = tensor.sum(tensor.sqr(diff))
gw = gradient.grad(loss, w) gw = tensor.grad(loss, w)
trainfn = compile.Function([x, w], [y, loss, gw]) trainfn = compile.Function([x, w], [y, loss, gw])
x = numpy.asarray([[1., 2], [3, 4], [2, 1]]) x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
...@@ -284,7 +284,7 @@ class _testCase_dot(unittest.TestCase): ...@@ -284,7 +284,7 @@ class _testCase_dot(unittest.TestCase):
y = dense_from_sparse(dot(w.T, xw)) y = dense_from_sparse(dot(w.T, xw))
diff = x-y diff = x-y
loss = tensor.sum(tensor.sqr(diff)) loss = tensor.sum(tensor.sqr(diff))
gw = gradient.grad(loss, w) gw = tensor.grad(loss, w)
trainfn = compile.Function([x, w], [y, loss, gw]) trainfn = compile.Function([x, w], [y, loss, gw])
x = xorig x = xorig
......
...@@ -506,7 +506,7 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001 ...@@ -506,7 +506,7 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001
num_grad = gradient.numeric_grad(cost_fn, pt) num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = gradient.grad(cost, tensor_pt,astensor(1.0,name='g_cost')) symbolic_grad = grad(cost, tensor_pt,astensor(1.0,name='g_cost'))
if 0: if 0:
print '-------' print '-------'
print '----------' print '----------'
...@@ -840,7 +840,7 @@ class T_subtensor(unittest.TestCase): ...@@ -840,7 +840,7 @@ class T_subtensor(unittest.TestCase):
n = astensor(numpy.random.rand(2,3)) n = astensor(numpy.random.rand(2,3))
z = scal.constant(0) z = scal.constant(0)
t = n[z:,z] t = n[z:,z]
gn = gradient.grad(sum(exp(t)), n) gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn]) gval = eval_outputs([gn])
s0 = 'array([ 2.05362099, 0. , 0. ])' s0 = 'array([ 2.05362099, 0. , 0. ])'
s1 = 'array([ 1.55009327, 0. , 0. ])' s1 = 'array([ 1.55009327, 0. , 0. ])'
...@@ -850,7 +850,7 @@ class T_subtensor(unittest.TestCase): ...@@ -850,7 +850,7 @@ class T_subtensor(unittest.TestCase):
def test_grad_0d(self): def test_grad_0d(self):
n = astensor(numpy.random.rand(2,3)) n = astensor(numpy.random.rand(2,3))
t = n[1,0] t = n[1,0]
gn = gradient.grad(sum(exp(t)), n) gn = grad(sum(exp(t)), n)
gval = eval_outputs([gn]) gval = eval_outputs([gn])
g0 = repr(gval[0,:]) g0 = repr(gval[0,:])
g1 = repr(gval[1,:]) g1 = repr(gval[1,:])
...@@ -1084,7 +1084,7 @@ class _testCase_matinv(unittest.TestCase): ...@@ -1084,7 +1084,7 @@ class _testCase_matinv(unittest.TestCase):
# Sum of squared errors # Sum of squared errors
ssdiff = sum((diff**2.0)) ssdiff = sum((diff**2.0))
g_b = gradient.grad(ssdiff, b) g_b = grad(ssdiff, b)
# compilation to function # compilation to function
# [a,b] are the inputs, [ssdiff,g_b] are the outputs # [a,b] are the inputs, [ssdiff,g_b] are the outputs
...@@ -1500,6 +1500,43 @@ class T_stdlib(unittest.TestCase): ...@@ -1500,6 +1500,43 @@ class T_stdlib(unittest.TestCase):
self.failUnless(t.data == 1.0) self.failUnless(t.data == 1.0)
self.failUnless(t.data is not tt.data) self.failUnless(t.data is not tt.data)
class _test_grad(unittest.TestCase):
class O(gof.op.Op):
def __init__(self):
self.inputs = [scalar('a'),scalar('c')]
self.outputs = [scalar('b'),scalar('d')]
self.gval0 = scalar('e')
self.gval1 = scalar('f')
def grad(self, (x0,x1), (gz0,gz1)):
return self.gval0, self.gval1
def test_1param(self):
"""grad: Test passing a single result param"""
a1 = _test_grad.O()
self.failUnless(a1.gval0 is grad(a1.outputs[0], a1.inputs[0]))
def test_Nparam(self):
"""grad: Test passing multiple result params"""
a1 = _test_grad.O()
g0,g1 = grad(a1.outputs[0], a1.inputs)
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
def test_1None_rval(self):
"""grad: Test returning a single None from grad"""
a1 = _test_grad.O()
self.failUnless(None is grad(a1.outputs[0], a1.outputs[1]))
self.failUnless(None is grad(a1.outputs[0], 'wtf'))
def test_NNone_rval(self):
"""grad: Test returning some Nones from grad"""
a1 = _test_grad.O()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + ['wtf'])
self.failUnless(a1.gval0 is g0)
self.failUnless(a1.gval1 is g1)
self.failUnless(None is g2)
......
...@@ -108,24 +108,6 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -108,24 +108,6 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r gmap[r] = g_r
return gmap return gmap
def grad(cost, param, g_cost=1.0):
"""
@type cost: L{Result}
@type param: L{Result} or list of L{Result}s.
@rtype: L{Result} or list of L{Result}s (depending upon I{param})
@return: symbolic expression of gradient of I{cost} wrt I{param}.
If I{param} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
inputs = gof.graph.inputs([cost])
gmap = grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(param, list):
return [gmap.get(p, None) for p in param]
else:
return gmap.get(param, None)
class numeric_grad: class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7): def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt. """Return the gradient of f at pt.
......
...@@ -11,6 +11,7 @@ import gof.result ...@@ -11,6 +11,7 @@ import gof.result
import gof.op import gof.op
import blas # for gemm, dot import blas # for gemm, dot
import gradient
import elemwise as s2t import elemwise as s2t
import scalar as scal import scalar as scal
...@@ -1078,3 +1079,31 @@ class Gemm(_Op): ...@@ -1078,3 +1079,31 @@ class Gemm(_Op):
""" % dict(locals(), **sub) """ % dict(locals(), **sub)
gemm = gof.op.constructor(Gemm) gemm = gof.op.constructor(Gemm)
#########################
# Gradient
#########################
def grad(cost, wrt, g_cost=None):
"""
@type cost: L{Result}
@type wrt: L{Result} or list of L{Result}s.
@type g_cost: L{Result} broadcastable to size of I{cost}, or None
@param g_cost: an expression for the gradient through cost. The default is
{{{ones_like(cost)}}}
@rtype: L{Result} or list of L{Result}s (depending upon I{wrt})
@return: symbolic expression of gradient of I{cost} with respect to I{wrt}.
If I{wrt} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list.
"""
if g_cost is None:
g_cost = ones_like(cost)
inputs = gof.graph.inputs([cost])
gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(wrt, list):
return [gmap.get(p, None) for p in wrt]
else:
return gmap.get(wrt, None)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论