fixed orphan order bug in Function, continuing to bring Ops back

上级 1e2dd4e6
...@@ -273,39 +273,5 @@ def matrices(n): ...@@ -273,39 +273,5 @@ def matrices(n):
return [matrix() for i in xrange(n)] return [matrix() for i in xrange(n)]
#TODO: move this to the _test_tensor_ops.py
class _testCase_matinv:# (unittest.TestCase):
def setUp(self):
numpy.random.seed(1)
def matinv(self,dim):
# symbolic program
a,b = matrices(2)
ab = T.dot(a,b)
diff = ab - tensor.tensor(numpy.identity(dim))
ssdiff = T.sum((diff**2.0))
g = grad(ssdiff,None, tensor.tensor(numpy.ones(1)))
# compilation to function
fn = compile.Function([a,b], [ssdiff,g(b)])
# use the function
w = numpy.random.rand(dim,dim)
wi = numpy.random.rand(dim,dim)
for i in xrange(300):
ssd, gw = fn(w,wi)
#print ssdiff
if i == 0:
str0 = str(ssd)
wi -= 0.4 * gw
return str0, str(ssd)
def test_matinv(self):
"""Matrix inversion by gradient descent (eval mode)"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
from tensor import * from tensor import *
import tensor as T import tensor # for hidden symbols
import unittest import unittest
from copy import copy from copy import copy
from compile import Function from compile import Function
import gradient import gradient
import gof import gof, gof.graph
#TODO: consider moving this function / functionality to gradient.py #TODO: consider moving this function / functionality to gradient.py
# rationale: it's tricky, and necessary everytime you want to verify # rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically # gradient numerically
def verify_grad(testcase, op_cls, pt_list, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001): def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001):
"""testcase.failUnless( analytic gradient matches finite-diff gradient) """ """testcase.failUnless( analytic gradient matches finite-diff gradient) """
for test_num in xrange(n_tests): for test_num in xrange(n_tests):
for pt in pt_list: tensor_pt = [tinit(p,name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt = [tensor(p) for p in pt] o = op_cls(*tensor_pt)
o = op_cls(*tensor_pt) if len(o.outputs) > 1:
if len(o.outputs) > 1: raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs') # we could make loop over outputs making random projections R for each,
# we could make loop over outputs making random projections R for each, # but this doesn't handle the case where not all the outputs are
# but this doesn't handle the case where not all the outputs are # differentiable... so I leave this as TODO for now -JB.
# differentiable... so I leave this as TODO for now -jsb. o_fn = Function(tensor_pt, o.outputs)
o_fn = Function(tensor_pt, o.outputs) o_fn_out = o_fn(*pt)
o_fn_out = o_fn(*pt) random_projection = rng.rand(*o_fn_out.shape)
random_projection = rng.rand(*o_fn_out.shape) t_r = tinit(random_projection)
t_r = tensor(random_projection)
#random projection of o onto t_r
#random projection of o onto t_r cost = sum(t_r * o.outputs[0])
cost = sum(t_r * o.outputs[0]) cost_fn = Function(tensor_pt, [cost])
cost_fn = Function(tensor_pt, [cost])
num_grad = gradient.numeric_grad(cost_fn, pt)
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = gradient.grad(cost, tensor_pt,tinit(1.0,name='g_cost'))
grad_fn = Function(tensor_pt, gradient.grad(cost, tensor_pt)) if 0:
print '-------'
analytic_grad = grad_fn() print '----------'
if not isinstance(analytic_grad, (list, tuple)): for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
analytic_grad = [analytic_grad] print op
grad_fn = Function(tensor_pt, symbolic_grad)
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad) analytic_grad = grad_fn(*pt)
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
...@@ -56,7 +61,7 @@ def check_eq2(self, inputs, output, args_in, arg_out): ...@@ -56,7 +61,7 @@ def check_eq2(self, inputs, output, args_in, arg_out):
val = fn(*args_in) val = fn(*args_in)
self.failUnless( numpy.all(val == arg_out), (val, arg_out)) self.failUnless( numpy.all(val == arg_out), (val, arg_out))
def check_eq2(self, inputs, output, args_in, arg_out): def check_eq2_c(self, inputs, output, args_in, arg_out):
fn = Function(inputs, [output], linker_cls = gof.CLinker) fn = Function(inputs, [output], linker_cls = gof.CLinker)
val = fn(*args_in) val = fn(*args_in)
self.failUnless( numpy.all(val == arg_out), (val, arg_out)) self.failUnless( numpy.all(val == arg_out), (val, arg_out))
...@@ -64,20 +69,21 @@ def check_eq2(self, inputs, output, args_in, arg_out): ...@@ -64,20 +69,21 @@ def check_eq2(self, inputs, output, args_in, arg_out):
class T_abs(unittest.TestCase): class T_abs(unittest.TestCase):
def test_impl(self): def test_impl(self):
t = tensor(1.0) t = tinit(1.0)
check_eq(self, t, abs(t), 1.0, 1.0) check_eq(self, t, abs(t), 1.0, 1.0)
check_eq(self, t, abs(t), -1.0, 1.0) check_eq(self, t, abs(t), -1.0, 1.0)
for shape in (2,), (3,4): for shape in (2,), (3,4):
t = tensor(numpy.ones(shape)) t = tinit(numpy.ones(shape))
d = numpy.random.rand(*shape)*2-1.0 d = numpy.random.rand(*shape)*2-1.0
check_eq(self, t, abs(t), d, abs(d)) check_eq(self, t, abs(t), d, abs(d))
check_eq(self, t, abs(t), -d, abs(-d)) check_eq(self, t, abs(t), -d, abs(-d))
def test_grad(self): def test_grad(self):
verify_grad(self, Abs, [[numpy.ones(())], [numpy.ones(3)]]) verify_grad(self, Abs, [numpy.ones(())])
verify_grad(self, Abs, [numpy.ones(3)])
class AbsBadGrad(T._Elemwise): class AbsBadGrad(tensor._Elemwise):
def impl(self, x): def impl(self, x):
return numpy.abs(x) return numpy.abs(x)
def grad(self, x, gz): def grad(self, x, gz):
...@@ -87,52 +93,137 @@ class T_abs(unittest.TestCase): ...@@ -87,52 +93,137 @@ class T_abs(unittest.TestCase):
def test_badgrad(self): def test_badgrad(self):
try: try:
verify_grad(self, T_abs.AbsBadGrad, [[numpy.ones(())], [numpy.ones(3)]]) verify_grad(self, T_abs.AbsBadGrad, [numpy.ones(())])
self.fail() self.fail()
except Exception, e: except Exception, e:
self.failUnless(str(e) == verify_grad.E_grad, str(e)) self.failUnless(str(e) == verify_grad.E_grad, str(e))
class T_fill(unittest.TestCase):
def test0(self):
t = fill(numpy.asarray([1,2,3]), 9.0)
self.failUnless(t.owner.__class__ == Fill)
o = t.owner
self.failUnless(o.inputs[0].broadcastable == (0,))
self.failUnless(o.inputs[0].dtype[0:3] == 'int')
self.failUnless(o.inputs[1].broadcastable == ())
self.failUnless(o.inputs[1].dtype[0:3] == 'flo')
self.failUnless(o.outputs[0].broadcastable == (0,))
self.failUnless(o.outputs[0].dtype[0:3] == 'flo')
class T_sum(unittest.TestCase): class T_sum(unittest.TestCase):
def test_impl(self): def test_impl(self):
t = tensor(0.0) t = tinit(0.0)
check_eq(self, t, Sum(t).out, 1.0, 1.0) check_eq(self, t, Sum(t).out, 1.0, 1.0)
check_eq(self, t, Sum(t).out, -1.0, -1.0) check_eq(self, t, Sum(t).out, -1.0, -1.0)
t = tensor([0.0, 0.0]) t = tinit([0.0, 0.0])
d = numpy.asarray([-0.4, 1.2]) d = numpy.asarray([-0.4, 1.2])
check_eq(self, t, Sum(t).out, d, numpy.sum(d)) check_eq(self, t, Sum(t).out, d, numpy.sum(d))
check_eq(self, t, Sum(t).out, -d, -numpy.sum(d)) check_eq(self, t, Sum(t).out, -d, -numpy.sum(d))
class T_mul(unittest.TestCase): class T_mul(unittest.TestCase):
def setUp(self):
numpy.random.seed([1,2,3,4])
def test_elemwise(self): def test_elemwise(self):
a = tensor(0.0) a = tinit(0.0)
b = tensor(0.0) b = tinit(0.0)
check_eq2(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0) check_eq2(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0)
check_eq2(self, [a,b], mul_elemwise(a,a), [-1.0,2.0], 1.0) check_eq2(self, [a,b], mul_elemwise(b,a), [-1.0,2.0], -2.0)
check_eq2(self, [a,b], mul(a,b), [3.0, 4.0], 12.0) self.failUnless(isinstance(mul(a,b).owner, Scale))
check_eq2(self, [a,b], mul(a,a), [-1.0,2.0], 1.0)
a = tensor(numpy.ones(2)) a = tinit(numpy.ones(2))
b = tensor(numpy.ones(2)) b = tinit(numpy.ones(2))
aa = numpy.asarray([-0.5, 4.0]) aa = numpy.asarray([-0.5, 4.0])
bb = numpy.asarray([-0.5, 2.0]) bb = numpy.asarray([-0.5, 2.0])
check_eq2(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0])) check_eq2(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
check_eq2(self, [a,b], mul_elemwise(a,b), [aa,aa], numpy.asarray([0.25, 16.0])) check_eq2(self, [a,b], mul_elemwise(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
check_eq2(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0])) self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
check_eq2(self, [a,b], mul(a,b), [aa,aa], numpy.asarray([0.25, 16.0]))
def test_scalar(self):
r = numpy.random.rand(2,3)
a = tinit(r)
b = tinit(2.0)
check_eq2(self, [a,b], scale(a,b), [r, 2.0], r*2.0)
check_eq2(self, [a,b], scale(a,b), [r, 4.0], r*4.0)
self.failUnless(b.data == 2.0)
def test_operator(self):
a = tinit([1,1])
aa = tinit([1,1])
b = tinit(4.0)
self.failUnless(isinstance((a*b).owner, Scale))
self.failUnless(isinstance((b*a).owner, Scale))
self.failUnless(isinstance((a*aa).owner, MulElemwise))
self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_wrong_shapes(self): def test_wrong_shapes(self):
a = tensor(numpy.ones(3)) a = tinit(numpy.ones(3))
b = tensor(numpy.ones(4)) b = tinit(numpy.ones(4))
try: try:
check_eq2(self, [a,b], MulElemwise(a,b).out, check_eq2(self, [a,b], MulElemwise(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0) [numpy.ones(3), numpy.ones(4)], 1.0)
self.fail() self.fail()
except ValueError, e: except ValueError, e:
self.failUnless(e is T._assert_same_shapes.E_shape) self.failUnless(e is tensor._assert_same_shapes.E_shape)
class T_div(unittest.TestCase):
def setUp(self):
numpy.random.seed(9999)
def test_grad_e(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
def test_grad_sl(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
class T_pow(unittest.TestCase):
def setUp(self):
numpy.random.seed(9999)
def test_elemwise(self):
verify_grad(self, DivElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
verify_grad(self, PowElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_scalar_l(self):
verify_grad(self, PowScalarL, [numpy.random.rand(3), 3.0])
def test_scalar_r(self):
verify_grad(self, PowScalarR, [numpy.random.rand(3), 3.0])
class _testCase_matinv:#(unittest.TestCase):
def setUp(self):
numpy.random.seed(1)
def mat_recip(self,dim):
# symbolic program
a = Tensor('float64', [0,0], name='a')
b = Tensor('float64', [0,0], name='b')
ab = a*b
diff = ab - tinit(numpy.ones((dim,dim)))
ssdiff = sum((diff**2.0))
g_b = gradient.grad(ssdiff, b, tinit(numpy.ones(1),name='g_cost'))
# compilation to function
fn = Function([a,b], [ssdiff,g_b])
# use the function
w = numpy.random.rand(dim,dim)
wi = numpy.random.rand(dim,dim)
for i in xrange(300):
ssd, gw = fn(w,wi)
#print ssd
if i == 0:
str0 = str(ssd)
wi -= 0.4 * gw
return str0, str(ssd)
def test_recip(self):
"""Matrix reciprocal by gradient descent"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.mat_recip(3))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -55,7 +55,10 @@ class BaseTensor(ResultBase): ...@@ -55,7 +55,10 @@ class BaseTensor(ResultBase):
if not isinstance(arr, numpy.ndarray): if not isinstance(arr, numpy.ndarray):
arr = numpy.asarray(arr, dtype = self.dtype) arr = numpy.asarray(arr, dtype = self.dtype)
if len(self.broadcastable) != len(arr.shape): if len(self.broadcastable) != len(arr.shape):
raise ValueError(BaseTensor.filter.E_rank) raise ValueError(BaseTensor.filter.E_rank,
self.broadcastable,
arr.shape,
self.owner)
for b, s in zip(self.broadcastable, arr.shape): for b, s in zip(self.broadcastable, arr.shape):
if b and (s != 1): if b and (s != 1):
raise ValueError(BaseTensor.filter.E_shape) raise ValueError(BaseTensor.filter.E_shape)
......
...@@ -71,7 +71,7 @@ class Function: ...@@ -71,7 +71,7 @@ class Function:
#print 'orphans', orphans #print 'orphans', orphans
#print 'ops', gof.graph.ops(inputs, outputs) #print 'ops', gof.graph.ops(inputs, outputs)
env = gof.env.Env(inputs, outputs, features, consistency_check = True) env = gof.env.Env(inputs, outputs, features + [gof.EquivTool], consistency_check = True)
#print 'orphans in env', env.orphans() #print 'orphans in env', env.orphans()
...@@ -79,7 +79,7 @@ class Function: ...@@ -79,7 +79,7 @@ class Function:
#print 'orphans after clone', env.orphans() #print 'orphans after clone', env.orphans()
for d, o in zip(orphan_data, env.orphans()): for d, o in zip(orphan_data, [env.equiv(orphan) for orphan in orphans]):
#print 'assigning orphan value', d #print 'assigning orphan value', d
o.data = d o.data = d
......
...@@ -95,13 +95,13 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -95,13 +95,13 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r gmap[r] = g_r
return gmap return gmap
def grad(cost, param): def grad(cost, param, g_cost=1.0):
"""Return symbolic expression of gradient of <cost> wrt <param>. """Return symbolic expression of gradient of <cost> wrt <param>.
If <param> is a list, then return a list containing the gradient of cost wrt If <param> is a list, then return a list containing the gradient of cost wrt
each element of the list. each element of the list.
""" """
inputs = gof.graph.inputs([cost]) inputs = gof.graph.inputs([cost])
gmap = grad_sources_inputs([(cost, 1.0)], inputs) gmap = grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(param, list): if isinstance(param, list):
return [gmap.get(p, None) for p in param] return [gmap.get(p, None) for p in param]
else: else:
...@@ -136,9 +136,9 @@ class numeric_grad: ...@@ -136,9 +136,9 @@ class numeric_grad:
f_eps = f(*pt) f_eps = f(*pt)
gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps) gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i] = orig pt[idx][i] = orig
elif len(args[idx].shape) == 2: elif len(pt[idx].shape) == 2:
for i in xrange(pt[idx].shape[0]): for i in xrange(pt[idx].shape[0]):
for j in xrange(args[idx].shape[1]): for j in xrange(pt[idx].shape[1]):
orig = pt[idx][i,j] orig = pt[idx][i,j]
pt[idx][i,j] = pt[idx][i,j] + eps pt[idx][i,j] = pt[idx][i,j] + eps
f_eps = f(*pt) f_eps = f(*pt)
......
...@@ -15,8 +15,8 @@ class Tensor(BaseTensor): ...@@ -15,8 +15,8 @@ class Tensor(BaseTensor):
of Tensor operations contained in this file. of Tensor operations contained in this file.
Operators: Operators:
- most numeric operators are overloaded to return Ops that *would* perform - most numeric operators are overloaded (to return Ops that perform the
the corresponding calculation corresponding calculation)
""" """
#UNARY #UNARY
...@@ -65,7 +65,7 @@ class Tensor(BaseTensor): ...@@ -65,7 +65,7 @@ class Tensor(BaseTensor):
def __getslice__(self, key): raise NotImplementedError() def __getslice__(self, key): raise NotImplementedError()
# alternate Tensor constructor # alternate Tensor constructor
def tensor(data, broadcastable=None, role=None, name=None): def tinit(data, broadcastable=None, role=None, name=None):
"""Return a Tensor containing given data""" """Return a Tensor containing given data"""
data = numpy.asarray(data) data = numpy.asarray(data)
if broadcastable is None: if broadcastable is None:
...@@ -88,7 +88,7 @@ def _scalar_switch(normal_f, scalar_f, scalar_f_reverse = None): ...@@ -88,7 +88,7 @@ def _scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
if isinstance(obj, Tensor): if isinstance(obj, Tensor):
return obj return obj
else: else:
return tensor(obj) return tinit(obj)
x, y = as_tensor(x), as_tensor(y) x, y = as_tensor(x), as_tensor(y)
if 0 not in y.broadcastable: if 0 not in y.broadcastable:
return scalar_f(x, y) return scalar_f(x, y)
...@@ -125,7 +125,7 @@ class _Op(Op): ...@@ -125,7 +125,7 @@ class _Op(Op):
if isinstance(obj, Tensor): if isinstance(obj, Tensor):
return obj return obj
else: else:
return tensor(obj) return tinit(obj)
inputs = map(as_tensor, inputs) inputs = map(as_tensor, inputs)
if self.nin >= 0: if self.nin >= 0:
...@@ -148,8 +148,11 @@ class _Op(Op): ...@@ -148,8 +148,11 @@ class _Op(Op):
def propagate_dtype(self, *i_dtypes): def propagate_dtype(self, *i_dtypes):
def upcast(dtype, *dtypes): def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype) z = numpy.zeros((), dtype = dtype)
#print '----', self.__class__
#print type(z), dtype
for dtype in dtypes: for dtype in dtypes:
z = z + numpy.zeros((), dtype = dtype) z = z + numpy.zeros((), dtype = dtype)
#print type(z), type(dtype), dtype
return str(z.dtype) return str(z.dtype)
for dtype in i_dtypes: for dtype in i_dtypes:
if dtype is None: if dtype is None:
...@@ -213,7 +216,7 @@ class _Elemwise(Elemwise, _Op): ...@@ -213,7 +216,7 @@ class _Elemwise(Elemwise, _Op):
raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % nonloop_o) raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % nonloop_o)
all_bcast = [broadcastable for broadcastable, i in zip(inputs, idesc) if i[1]] all_bcast = [broadcastable for broadcastable, i in zip(inputs, idesc) if i[1]]
if reduce(lambda x, y: x is not False and x == y and y, [len(x) for x in all_bcast]) is False: if reduce(lambda x, y: x is not False and x == y and y, [len(x) for x in all_bcast]) is False:
raise TypeError("Inputs that are loop variables do not all have the same number of dimensions.") raise TypeError(_Elemwise.propagate_broadcastable.E_ndim, self.__class__)
ret = [] ret = []
for arr in zip(*all_bcast): for arr in zip(*all_bcast):
if 0 in arr: if 0 in arr:
...@@ -221,6 +224,8 @@ class _Elemwise(Elemwise, _Op): ...@@ -221,6 +224,8 @@ class _Elemwise(Elemwise, _Op):
else: else:
ret.append(1) ret.append(1)
return [ret] * self.nout return [ret] * self.nout
propagate_broadcastable.E_ndim \
= "Inputs that are loop variables do not all have the same number of dimensions."
def c_init(self, inputs, outputs): def c_init(self, inputs, outputs):
raise AbstractFunctionError() raise AbstractFunctionError()
...@@ -255,7 +260,10 @@ class TensorScalarOp(_Elemwise): ...@@ -255,7 +260,10 @@ class TensorScalarOp(_Elemwise):
def c_code_foreach(self): def c_code_foreach(self):
return "%%(z)s_i = %s;" % self.c_expr return "%%(z)s_i = %s;" % self.c_expr
def constructor(op_cls): def _constructor(op_cls):
"""Return a function that calls op_cls(*input)
and returns the outputs of the op (with single outputs unpacked)
"""
def f(*args, **kwargs): def f(*args, **kwargs):
op = op_cls(*args, **kwargs) op = op_cls(*args, **kwargs)
if len(op.outputs) > 1: if len(op.outputs) > 1:
...@@ -278,6 +286,12 @@ class Abs(_Elemwise): ...@@ -278,6 +286,12 @@ class Abs(_Elemwise):
return "%(z)s_i = abs(%(x)s_i);" return "%(z)s_i = abs(%(x)s_i);"
#Constructor not necessary because builtin abs() does this #Constructor not necessary because builtin abs() does this
class Exp(_Elemwise):
def impl(self, x): return numpy.exp(x)
def grad(self, x, gz): return gz * exp(x)
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);"
exp = _constructor(Exp)
class Neg(_Elemwise): class Neg(_Elemwise):
def impl(self, x): def impl(self, x):
return -x return -x
...@@ -287,6 +301,12 @@ class Neg(_Elemwise): ...@@ -287,6 +301,12 @@ class Neg(_Elemwise):
return "%(z)s_i = -%(x)s_i;" return "%(z)s_i = -%(x)s_i;"
#Constructor not necessary because unary '-' does this #Constructor not necessary because unary '-' does this
class Log(_Elemwise):
def impl(self, x): return numpy.log(x)
def grad(self, x, gz): return gz / x
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
log = _constructor(Log)
class Sgn(_Elemwise): class Sgn(_Elemwise):
def impl(self, x): def impl(self, x):
return numpy.abs(x) / x return numpy.abs(x) / x
...@@ -294,7 +314,7 @@ class Sgn(_Elemwise): ...@@ -294,7 +314,7 @@ class Sgn(_Elemwise):
return [None] return [None]
def c_foreach(self, (x_i, ), (z_i, )): def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i/abs(%(x)s_i);" # TODO: C use copysign return "%(z)s_i = %(x)s_i/abs(%(x)s_i);" # TODO: C use copysign
sgn = constructor(Sgn) sgn = _constructor(Sgn)
class Sum(_Elemwise): class Sum(_Elemwise):
def impl(self, x): def impl(self, x):
...@@ -307,7 +327,7 @@ class Sum(_Elemwise): ...@@ -307,7 +327,7 @@ class Sum(_Elemwise):
return "dtype_%(sum)s* %(sum)sp = ((dtype_%(sum)s*)PyArray_DATA(%(sum)s)); %(sum)sp[0] = 0;" return "dtype_%(sum)s* %(sum)sp = ((dtype_%(sum)s*)PyArray_DATA(%(sum)s)); %(sum)sp[0] = 0;"
def c_foreach(self, (x_i, ), (sum, )): def c_foreach(self, (x_i, ), (sum, )):
return "%(sum)sp[0] += %(x)s_i;" return "%(sum)sp[0] += %(x)s_i;"
sum = constructor(Sum) sum = _constructor(Sum)
class Fill(_Elemwise): class Fill(_Elemwise):
def impl(self, model, value): def impl(self, model, value):
...@@ -318,7 +338,7 @@ class Fill(_Elemwise): ...@@ -318,7 +338,7 @@ class Fill(_Elemwise):
return "dtype_%(value)s %(value)s0 = ((dtype_%(value)s*)PyArray_DATA(%(value)s))[0];" return "dtype_%(value)s %(value)s0 = ((dtype_%(value)s*)PyArray_DATA(%(value)s))[0];"
def c_foreach(self, (model_i, value), (z_i, )): def c_foreach(self, (model_i, value), (z_i, )):
return "%(z)s_i = %(value)s0;" return "%(z)s_i = %(value)s0;"
fill = constructor(Fill) fill = _constructor(Fill)
class TensorCopy(_Elemwise): class TensorCopy(_Elemwise):
...@@ -328,7 +348,7 @@ class TensorCopy(_Elemwise): ...@@ -328,7 +348,7 @@ class TensorCopy(_Elemwise):
return gz return gz
def c_foreach(self, (x_i, ), (z_i, )): def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i;" return "%(z)s_i = %(x)s_i;"
tensor_copy = constructor(TensorCopy) tensor_copy = _constructor(TensorCopy)
if 0: if 0:
########################## ##########################
...@@ -372,79 +392,83 @@ if 0: ...@@ -372,79 +392,83 @@ if 0:
raise NotImplemented raise NotImplemented
if 0: ##########################
########################## # Arithmetic : Add
# Arithmetic : Add ##########################
##########################
# Elemwise # # Elemwise #
class add_elemwise(_Elemwise): class AddElemwise(_Elemwise):
def impl(self, x, y): def impl(self, x, y):
_assert_same_shapes(x, y) _assert_same_shapes(x, y)
return x + y return x + y
def grad(self, (x, y), gz): def grad(self, (x, y), gz):
return gz, gz return gz, gz
def c_foreach(self, (x_i, y_i), (z_i, )): def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i + y_i;" return "z_i = x_i + y_i;"
add_elemwise = _constructor(AddElemwise)
class add_elemwise_inplace(add_elemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x += y
return x
# Scalar #
class add_scalar(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x + a
def grad(self, (x, a), gz):
return gz, sum(gz)
c_expr = "x_i + a"
class add_scalar_inplace(add_scalar.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x += a
return x
add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
class AddElemwiseInplace(AddElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x += y
return x
add_elemwise_inplace = _constructor(AddElemwiseInplace)
if 0: # Scalar #
########################## class AddScalar(TensorScalarOp):
# Arithmetic : Sub def impl(self, x, a):
########################## _assert_tensor_scalar(x, a)
return x + a
def grad(self, (x, a), gz):
return gz, sum(gz)
c_expr = "x_i + a"
add_scalar = _constructor(AddScalar)
# Elemwise # class AddScalarInplace(AddScalar.inplace_version()):
class SubElemwise(_Elemwise): def impl(self, x, a):
def impl(self, x, y): _assert_tensor_scalar(x, a)
_assert_same_shapes(x, y) x += a
return x - y return x
def grad(self, (x, y), gz): add_scalar_inplace = _constructor(AddScalarInplace)
return gz, -gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i - y_i;"
class SubElemwiseInplace(SubElemwise.inplace_version()): add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
def impl(self, x, y): add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
_assert_same_shapes(x, y)
x -= y
return x
# Scalar #
def sub_scalar_r(x, a):
return add_scalar(x, -a)
def sub_scalar_l(x, a): ##########################
return add_scalar(-x, a) # Arithmetic : Sub
##########################
def sub_scalar_rinplace(x, a): # Elemwise #
return add_scalar_inplace(x, -a) class SubElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x - y
def grad(self, (x, y), gz):
return gz, -gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i - y_i;"
sub_elemwise = _constructor(SubElemwise)
class SubElemwiseInplace(SubElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x -= y
return x
sub_elemwise_inplace = _constructor(SubElemwiseInplace)
sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l) # Scalar #
sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace) def sub_scalar_r(x, a):
return add_scalar(x, -a)
def sub_scalar_l(x, a):
return add_scalar(-x, a)
def sub_scalar_rinplace(x, a):
return add_scalar_inplace(x, -a)
sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
########################## ##########################
# Arithmetic : Mul # Arithmetic : Mul
...@@ -459,14 +483,14 @@ class MulElemwise(_Elemwise): ...@@ -459,14 +483,14 @@ class MulElemwise(_Elemwise):
return mul(y, gz), mul(x, gz) return mul(y, gz), mul(x, gz)
def c_foreach(self, (x_i, y_i), (z_i, )): def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i * %(y)s_i;" return "%(z)s_i = %(x)s_i * %(y)s_i;"
mul_elemwise = constructor(MulElemwise) mul_elemwise = _constructor(MulElemwise)
class MulElemwiseInplace(MulElemwise.inplace_version()): class MulElemwiseInplace(MulElemwise.inplace_version()):
def impl(self, x, y): def impl(self, x, y):
_assert_same_shapes(x, y) _assert_same_shapes(x, y)
x *= y x *= y
return x return x
mul_elemwise_inplace = constructor(MulElemwiseInplace) mul_elemwise_inplace = _constructor(MulElemwiseInplace)
# Scalar # # Scalar #
class Scale(TensorScalarOp): class Scale(TensorScalarOp):
...@@ -476,109 +500,123 @@ class Scale(TensorScalarOp): ...@@ -476,109 +500,123 @@ class Scale(TensorScalarOp):
def grad(self, (x, a), gz): def grad(self, (x, a), gz):
return scale(a, gz), sum(mul_elemwise(x, gz)) return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "%(x)s_i * _%(a)s" c_expr = "%(x)s_i * _%(a)s"
scale = constructor(Scale) scale = _constructor(Scale)
class ScaleInplace(Scale.inplace_version()): class ScaleInplace(Scale.inplace_version()):
def impl(self, x, a): def impl(self, x, a):
_assert_tensor_scalar(x, a) _assert_tensor_scalar(x, a)
x *= a x *= a
return x return x
scale_inplace = constructor(ScaleInplace) scale_inplace = _constructor(ScaleInplace)
mul = _scalar_switch(mul_elemwise, scale, scale) mul = _scalar_switch(mul_elemwise, scale, scale)
mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace) mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace)
if 0: ##########################
########################## # Arithmetic : Div
# Arithmetic : Div ##########################
##########################
# Elemwise # # Elemwise #
class DivElemwise(_Elemwise): class DivElemwise(_Elemwise):
def impl(self, x, y): def impl(self, x, y):
_assert_same_shapes(x, y) _assert_same_shapes(x, y)
return x / y return x / y
def grad(self, (x, y), gz): def grad(self, (x, y), gz):
return div(gz, y), -div(mul(x, gz), sqr(y)) return div(gz, y), -div(mul(x, gz), (y*y))
def c_foreach(self, (x_i, y_i), (z_i, )): def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i / y_i;" return "%(z)s_i = %(x)s_i / %(y)s_i;"
div_elemwise = _constructor(DivElemwise)
class DivElemwiseInplace(DivElemwise.inplace_version()): class DivElemwiseInplace(DivElemwise.inplace_version()):
def impl(self, x, y): def impl(self, x, y):
_assert_same_shapes(x, y) _assert_same_shapes(x, y)
x /= y x /= y
return x return x
div_elemwise_inplace = _constructor(DivElemwiseInplace)
class InvElemwise(_Elemwise):
def impl(self, x):
return 1.0/x
def grad(self, x, gz):
return -gz / (x*x)
def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = 1.0 / %(x)s_i;" #TODO: cast 1.0 to the dtype of x
inv_elemwise = _constructor(InvElemwise)
# Scalar # # Scalar #
def div_scalar_r(x, a): def div_scalar_r(x, a):
return scale(x, inv_elemwise(a)) return scale(x, inv_elemwise(a))
def div_scalar_l(x, a): def div_scalar_l(x, a):
return scale(inv_elemwise(x), a) return scale(inv_elemwise(x), a)
def div_scalar_rinplace(x, a): def div_scalar_rinplace(x, a):
return scale_inplace(x, inv_elemwise(a)) return scale_inplace(x, inv_elemwise(a))
div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l) div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace) div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
if 0: ##########################
########################## # Arithmetic : Pow
# Arithmetic : Pow ##########################
##########################
# Elemwise #
class PowElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x ** y
def grad(self, (x, y), gz):
gx = gz * y * (pow_elemwise(x, y-1.0))
gs = gz * log(x) * pow_elemwise(x, y)
return gx, gs
def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = pow(%(x)s_i, %(y)s_i);"
pow_elemwise = _constructor(PowElemwise)
class PowElemwiseInplace(PowElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x **= y
return x
pow_elemwise_inplace = _constructor(PowElemwiseInplace)
# Scalar #
class PowScalarL(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return a ** x
def grad(self, (x, s), gz):
gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
return gx, gs
c_expr = "pow(%(a)s, %(x)s_i)"
pow_scalar_l = _constructor(PowScalarL)
class PowScalarR(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x ** a
def grad(self, (x, s), gz):
gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
return gx, gs
c_expr = "pow(%(x)s_i, _%(a)s)"
pow_scalar_r = _constructor(PowScalarR)
class PowScalarRInplace(PowScalarR.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x **= a
return x
pow_scalar_r_inplace = _constructor(PowScalarRInplace)
# Elemwise # pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace)
class PowElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x ** y
def grad(self, (x, s), gz):
gx = gz * s * (pow_elemwise(x, s-1.0))
gs = gz * log(x) * pow_elemwise(x, s)
return gx, gs
def c_foreach(self, (x_i, s_i), (z_i, )):
return "z_i = pow(x_i, s_i)"
class PowElemwiseInplace(PowElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x **= y
return x
# Scalar #
class PowScalarL(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return a ** x
def grad(self, (x, s), gz):
gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
return gx, gs
c_expr = "pow(a, x_i)"
class PowScalarR(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x ** a
def grad(self, (x, s), gz):
gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
return gx, gs
c_expr = "pow(x_i, a)"
class PowScalarRInplace(PowScalarR.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x **= a
return x
pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_rinplace)
if 0: if 0:
......
...@@ -83,16 +83,7 @@ class InvElemwiseInplace(InvElemwise.inplace_version()): ...@@ -83,16 +83,7 @@ class InvElemwiseInplace(InvElemwise.inplace_version()):
return x return x
class Exp(Elemwise):
def impl(self, x): return numpy.exp(x)
def grad(self, x, gz): return gz * exp(x)
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);"
class Log(Elemwise):
def impl(self, x): return numpy.log(x)
def grad(self, x, gz): return gz / x
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
class Log2(Elemwise): class Log2(Elemwise):
def impl(self, x): return numpy.log2(x) def impl(self, x): return numpy.log2(x)
def grad(self, x, gz): return gz / (x * numpy.log(2)) def grad(self, x, gz): return gz / (x * numpy.log(2))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论