fixed orphan order bug in Function, continuing to bring Ops back

上级 1e2dd4e6
......@@ -273,39 +273,5 @@ def matrices(n):
return [matrix() for i in xrange(n)]
#TODO: move this to the _test_tensor_ops.py
class _testCase_matinv:# (unittest.TestCase):
def setUp(self):
numpy.random.seed(1)
def matinv(self,dim):
# symbolic program
a,b = matrices(2)
ab = T.dot(a,b)
diff = ab - tensor.tensor(numpy.identity(dim))
ssdiff = T.sum((diff**2.0))
g = grad(ssdiff,None, tensor.tensor(numpy.ones(1)))
# compilation to function
fn = compile.Function([a,b], [ssdiff,g(b)])
# use the function
w = numpy.random.rand(dim,dim)
wi = numpy.random.rand(dim,dim)
for i in xrange(300):
ssd, gw = fn(w,wi)
#print ssdiff
if i == 0:
str0 = str(ssd)
wi -= 0.4 * gw
return str0, str(ssd)
def test_matinv(self):
"""Matrix inversion by gradient descent (eval mode)"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))
if __name__ == '__main__':
unittest.main()
from tensor import *
import tensor as T
import tensor # for hidden symbols
import unittest
from copy import copy
from compile import Function
import gradient
import gof
import gof, gof.graph
#TODO: consider moving this function / functionality to gradient.py
# rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically
def verify_grad(testcase, op_cls, pt_list, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001):
def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001):
"""testcase.failUnless( analytic gradient matches finite-diff gradient) """
for test_num in xrange(n_tests):
for pt in pt_list:
tensor_pt = [tensor(p) for p in pt]
o = op_cls(*tensor_pt)
if len(o.outputs) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -jsb.
o_fn = Function(tensor_pt, o.outputs)
o_fn_out = o_fn(*pt)
random_projection = rng.rand(*o_fn_out.shape)
t_r = tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o.outputs[0])
cost_fn = Function(tensor_pt, [cost])
num_grad = gradient.numeric_grad(cost_fn, pt)
grad_fn = Function(tensor_pt, gradient.grad(cost, tensor_pt))
analytic_grad = grad_fn()
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
tensor_pt = [tinit(p,name='input %i'%i) for i,p in enumerate(pt)]
o = op_cls(*tensor_pt)
if len(o.outputs) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = Function(tensor_pt, o.outputs)
o_fn_out = o_fn(*pt)
random_projection = rng.rand(*o_fn_out.shape)
t_r = tinit(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o.outputs[0])
cost_fn = Function(tensor_pt, [cost])
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = gradient.grad(cost, tensor_pt,tinit(1.0,name='g_cost'))
if 0:
print '-------'
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = Function(tensor_pt, symbolic_grad)
analytic_grad = grad_fn(*pt)
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance'
......@@ -56,7 +61,7 @@ def check_eq2(self, inputs, output, args_in, arg_out):
val = fn(*args_in)
self.failUnless( numpy.all(val == arg_out), (val, arg_out))
def check_eq2(self, inputs, output, args_in, arg_out):
def check_eq2_c(self, inputs, output, args_in, arg_out):
fn = Function(inputs, [output], linker_cls = gof.CLinker)
val = fn(*args_in)
self.failUnless( numpy.all(val == arg_out), (val, arg_out))
......@@ -64,20 +69,21 @@ def check_eq2(self, inputs, output, args_in, arg_out):
class T_abs(unittest.TestCase):
def test_impl(self):
t = tensor(1.0)
t = tinit(1.0)
check_eq(self, t, abs(t), 1.0, 1.0)
check_eq(self, t, abs(t), -1.0, 1.0)
for shape in (2,), (3,4):
t = tensor(numpy.ones(shape))
t = tinit(numpy.ones(shape))
d = numpy.random.rand(*shape)*2-1.0
check_eq(self, t, abs(t), d, abs(d))
check_eq(self, t, abs(t), -d, abs(-d))
def test_grad(self):
verify_grad(self, Abs, [[numpy.ones(())], [numpy.ones(3)]])
verify_grad(self, Abs, [numpy.ones(())])
verify_grad(self, Abs, [numpy.ones(3)])
class AbsBadGrad(T._Elemwise):
class AbsBadGrad(tensor._Elemwise):
def impl(self, x):
return numpy.abs(x)
def grad(self, x, gz):
......@@ -87,52 +93,137 @@ class T_abs(unittest.TestCase):
def test_badgrad(self):
try:
verify_grad(self, T_abs.AbsBadGrad, [[numpy.ones(())], [numpy.ones(3)]])
verify_grad(self, T_abs.AbsBadGrad, [numpy.ones(())])
self.fail()
except Exception, e:
self.failUnless(str(e) == verify_grad.E_grad, str(e))
class T_fill(unittest.TestCase):
def test0(self):
t = fill(numpy.asarray([1,2,3]), 9.0)
self.failUnless(t.owner.__class__ == Fill)
o = t.owner
self.failUnless(o.inputs[0].broadcastable == (0,))
self.failUnless(o.inputs[0].dtype[0:3] == 'int')
self.failUnless(o.inputs[1].broadcastable == ())
self.failUnless(o.inputs[1].dtype[0:3] == 'flo')
self.failUnless(o.outputs[0].broadcastable == (0,))
self.failUnless(o.outputs[0].dtype[0:3] == 'flo')
class T_sum(unittest.TestCase):
def test_impl(self):
t = tensor(0.0)
t = tinit(0.0)
check_eq(self, t, Sum(t).out, 1.0, 1.0)
check_eq(self, t, Sum(t).out, -1.0, -1.0)
t = tensor([0.0, 0.0])
t = tinit([0.0, 0.0])
d = numpy.asarray([-0.4, 1.2])
check_eq(self, t, Sum(t).out, d, numpy.sum(d))
check_eq(self, t, Sum(t).out, -d, -numpy.sum(d))
class T_mul(unittest.TestCase):
def setUp(self):
numpy.random.seed([1,2,3,4])
def test_elemwise(self):
a = tensor(0.0)
b = tensor(0.0)
a = tinit(0.0)
b = tinit(0.0)
check_eq2(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0)
check_eq2(self, [a,b], mul_elemwise(a,a), [-1.0,2.0], 1.0)
check_eq2(self, [a,b], mul(a,b), [3.0, 4.0], 12.0)
check_eq2(self, [a,b], mul(a,a), [-1.0,2.0], 1.0)
check_eq2(self, [a,b], mul_elemwise(b,a), [-1.0,2.0], -2.0)
self.failUnless(isinstance(mul(a,b).owner, Scale))
a = tensor(numpy.ones(2))
b = tensor(numpy.ones(2))
a = tinit(numpy.ones(2))
b = tinit(numpy.ones(2))
aa = numpy.asarray([-0.5, 4.0])
bb = numpy.asarray([-0.5, 2.0])
check_eq2(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
check_eq2(self, [a,b], mul_elemwise(a,b), [aa,aa], numpy.asarray([0.25, 16.0]))
check_eq2(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
check_eq2(self, [a,b], mul(a,b), [aa,aa], numpy.asarray([0.25, 16.0]))
check_eq2(self, [a,b], mul_elemwise(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
def test_scalar(self):
r = numpy.random.rand(2,3)
a = tinit(r)
b = tinit(2.0)
check_eq2(self, [a,b], scale(a,b), [r, 2.0], r*2.0)
check_eq2(self, [a,b], scale(a,b), [r, 4.0], r*4.0)
self.failUnless(b.data == 2.0)
def test_operator(self):
a = tinit([1,1])
aa = tinit([1,1])
b = tinit(4.0)
self.failUnless(isinstance((a*b).owner, Scale))
self.failUnless(isinstance((b*a).owner, Scale))
self.failUnless(isinstance((a*aa).owner, MulElemwise))
self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_wrong_shapes(self):
a = tensor(numpy.ones(3))
b = tensor(numpy.ones(4))
a = tinit(numpy.ones(3))
b = tinit(numpy.ones(4))
try:
check_eq2(self, [a,b], MulElemwise(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0)
self.fail()
except ValueError, e:
self.failUnless(e is T._assert_same_shapes.E_shape)
self.failUnless(e is tensor._assert_same_shapes.E_shape)
class T_div(unittest.TestCase):
def setUp(self):
numpy.random.seed(9999)
def test_grad_e(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
def test_grad_sl(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
class T_pow(unittest.TestCase):
def setUp(self):
numpy.random.seed(9999)
def test_elemwise(self):
verify_grad(self, DivElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
verify_grad(self, PowElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_scalar_l(self):
verify_grad(self, PowScalarL, [numpy.random.rand(3), 3.0])
def test_scalar_r(self):
verify_grad(self, PowScalarR, [numpy.random.rand(3), 3.0])
class _testCase_matinv:#(unittest.TestCase):
def setUp(self):
numpy.random.seed(1)
def mat_recip(self,dim):
# symbolic program
a = Tensor('float64', [0,0], name='a')
b = Tensor('float64', [0,0], name='b')
ab = a*b
diff = ab - tinit(numpy.ones((dim,dim)))
ssdiff = sum((diff**2.0))
g_b = gradient.grad(ssdiff, b, tinit(numpy.ones(1),name='g_cost'))
# compilation to function
fn = Function([a,b], [ssdiff,g_b])
# use the function
w = numpy.random.rand(dim,dim)
wi = numpy.random.rand(dim,dim)
for i in xrange(300):
ssd, gw = fn(w,wi)
#print ssd
if i == 0:
str0 = str(ssd)
wi -= 0.4 * gw
return str0, str(ssd)
def test_recip(self):
"""Matrix reciprocal by gradient descent"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.mat_recip(3))
if __name__ == '__main__':
unittest.main()
......@@ -55,7 +55,10 @@ class BaseTensor(ResultBase):
if not isinstance(arr, numpy.ndarray):
arr = numpy.asarray(arr, dtype = self.dtype)
if len(self.broadcastable) != len(arr.shape):
raise ValueError(BaseTensor.filter.E_rank)
raise ValueError(BaseTensor.filter.E_rank,
self.broadcastable,
arr.shape,
self.owner)
for b, s in zip(self.broadcastable, arr.shape):
if b and (s != 1):
raise ValueError(BaseTensor.filter.E_shape)
......
......@@ -71,7 +71,7 @@ class Function:
#print 'orphans', orphans
#print 'ops', gof.graph.ops(inputs, outputs)
env = gof.env.Env(inputs, outputs, features, consistency_check = True)
env = gof.env.Env(inputs, outputs, features + [gof.EquivTool], consistency_check = True)
#print 'orphans in env', env.orphans()
......@@ -79,7 +79,7 @@ class Function:
#print 'orphans after clone', env.orphans()
for d, o in zip(orphan_data, env.orphans()):
for d, o in zip(orphan_data, [env.equiv(orphan) for orphan in orphans]):
#print 'assigning orphan value', d
o.data = d
......
......@@ -95,13 +95,13 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r
return gmap
def grad(cost, param):
def grad(cost, param, g_cost=1.0):
"""Return symbolic expression of gradient of <cost> wrt <param>.
If <param> is a list, then return a list containing the gradient of cost wrt
each element of the list.
"""
inputs = gof.graph.inputs([cost])
gmap = grad_sources_inputs([(cost, 1.0)], inputs)
gmap = grad_sources_inputs([(cost, g_cost)], inputs)
if isinstance(param, list):
return [gmap.get(p, None) for p in param]
else:
......@@ -136,9 +136,9 @@ class numeric_grad:
f_eps = f(*pt)
gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i] = orig
elif len(args[idx].shape) == 2:
elif len(pt[idx].shape) == 2:
for i in xrange(pt[idx].shape[0]):
for j in xrange(args[idx].shape[1]):
for j in xrange(pt[idx].shape[1]):
orig = pt[idx][i,j]
pt[idx][i,j] = pt[idx][i,j] + eps
f_eps = f(*pt)
......
......@@ -15,8 +15,8 @@ class Tensor(BaseTensor):
of Tensor operations contained in this file.
Operators:
- most numeric operators are overloaded to return Ops that *would* perform
the corresponding calculation
- most numeric operators are overloaded (to return Ops that perform the
corresponding calculation)
"""
#UNARY
......@@ -65,7 +65,7 @@ class Tensor(BaseTensor):
def __getslice__(self, key): raise NotImplementedError()
# alternate Tensor constructor
def tensor(data, broadcastable=None, role=None, name=None):
def tinit(data, broadcastable=None, role=None, name=None):
"""Return a Tensor containing given data"""
data = numpy.asarray(data)
if broadcastable is None:
......@@ -88,7 +88,7 @@ def _scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
if isinstance(obj, Tensor):
return obj
else:
return tensor(obj)
return tinit(obj)
x, y = as_tensor(x), as_tensor(y)
if 0 not in y.broadcastable:
return scalar_f(x, y)
......@@ -125,7 +125,7 @@ class _Op(Op):
if isinstance(obj, Tensor):
return obj
else:
return tensor(obj)
return tinit(obj)
inputs = map(as_tensor, inputs)
if self.nin >= 0:
......@@ -148,8 +148,11 @@ class _Op(Op):
def propagate_dtype(self, *i_dtypes):
def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype)
#print '----', self.__class__
#print type(z), dtype
for dtype in dtypes:
z = z + numpy.zeros((), dtype = dtype)
#print type(z), type(dtype), dtype
return str(z.dtype)
for dtype in i_dtypes:
if dtype is None:
......@@ -213,7 +216,7 @@ class _Elemwise(Elemwise, _Op):
raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % nonloop_o)
all_bcast = [broadcastable for broadcastable, i in zip(inputs, idesc) if i[1]]
if reduce(lambda x, y: x is not False and x == y and y, [len(x) for x in all_bcast]) is False:
raise TypeError("Inputs that are loop variables do not all have the same number of dimensions.")
raise TypeError(_Elemwise.propagate_broadcastable.E_ndim, self.__class__)
ret = []
for arr in zip(*all_bcast):
if 0 in arr:
......@@ -221,6 +224,8 @@ class _Elemwise(Elemwise, _Op):
else:
ret.append(1)
return [ret] * self.nout
propagate_broadcastable.E_ndim \
= "Inputs that are loop variables do not all have the same number of dimensions."
def c_init(self, inputs, outputs):
raise AbstractFunctionError()
......@@ -255,7 +260,10 @@ class TensorScalarOp(_Elemwise):
def c_code_foreach(self):
return "%%(z)s_i = %s;" % self.c_expr
def constructor(op_cls):
def _constructor(op_cls):
"""Return a function that calls op_cls(*input)
and returns the outputs of the op (with single outputs unpacked)
"""
def f(*args, **kwargs):
op = op_cls(*args, **kwargs)
if len(op.outputs) > 1:
......@@ -278,6 +286,12 @@ class Abs(_Elemwise):
return "%(z)s_i = abs(%(x)s_i);"
#Constructor not necessary because builtin abs() does this
class Exp(_Elemwise):
def impl(self, x): return numpy.exp(x)
def grad(self, x, gz): return gz * exp(x)
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);"
exp = _constructor(Exp)
class Neg(_Elemwise):
def impl(self, x):
return -x
......@@ -287,6 +301,12 @@ class Neg(_Elemwise):
return "%(z)s_i = -%(x)s_i;"
#Constructor not necessary because unary '-' does this
class Log(_Elemwise):
def impl(self, x): return numpy.log(x)
def grad(self, x, gz): return gz / x
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
log = _constructor(Log)
class Sgn(_Elemwise):
def impl(self, x):
return numpy.abs(x) / x
......@@ -294,7 +314,7 @@ class Sgn(_Elemwise):
return [None]
def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i/abs(%(x)s_i);" # TODO: C use copysign
sgn = constructor(Sgn)
sgn = _constructor(Sgn)
class Sum(_Elemwise):
def impl(self, x):
......@@ -307,7 +327,7 @@ class Sum(_Elemwise):
return "dtype_%(sum)s* %(sum)sp = ((dtype_%(sum)s*)PyArray_DATA(%(sum)s)); %(sum)sp[0] = 0;"
def c_foreach(self, (x_i, ), (sum, )):
return "%(sum)sp[0] += %(x)s_i;"
sum = constructor(Sum)
sum = _constructor(Sum)
class Fill(_Elemwise):
def impl(self, model, value):
......@@ -318,7 +338,7 @@ class Fill(_Elemwise):
return "dtype_%(value)s %(value)s0 = ((dtype_%(value)s*)PyArray_DATA(%(value)s))[0];"
def c_foreach(self, (model_i, value), (z_i, )):
return "%(z)s_i = %(value)s0;"
fill = constructor(Fill)
fill = _constructor(Fill)
class TensorCopy(_Elemwise):
......@@ -328,7 +348,7 @@ class TensorCopy(_Elemwise):
return gz
def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i;"
tensor_copy = constructor(TensorCopy)
tensor_copy = _constructor(TensorCopy)
if 0:
##########################
......@@ -372,79 +392,83 @@ if 0:
raise NotImplemented
if 0:
##########################
# Arithmetic : Add
##########################
##########################
# Arithmetic : Add
##########################
# Elemwise #
class add_elemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x + y
def grad(self, (x, y), gz):
return gz, gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i + y_i;"
class add_elemwise_inplace(add_elemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x += y
return x
# Scalar #
class add_scalar(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x + a
def grad(self, (x, a), gz):
return gz, sum(gz)
c_expr = "x_i + a"
class add_scalar_inplace(add_scalar.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x += a
return x
add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
# Elemwise #
class AddElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x + y
def grad(self, (x, y), gz):
return gz, gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i + y_i;"
add_elemwise = _constructor(AddElemwise)
class AddElemwiseInplace(AddElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x += y
return x
add_elemwise_inplace = _constructor(AddElemwiseInplace)
if 0:
##########################
# Arithmetic : Sub
##########################
# Scalar #
class AddScalar(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x + a
def grad(self, (x, a), gz):
return gz, sum(gz)
c_expr = "x_i + a"
add_scalar = _constructor(AddScalar)
# Elemwise #
class SubElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x - y
def grad(self, (x, y), gz):
return gz, -gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i - y_i;"
class AddScalarInplace(AddScalar.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x += a
return x
add_scalar_inplace = _constructor(AddScalarInplace)
class SubElemwiseInplace(SubElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x -= y
return x
add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
# Scalar #
def sub_scalar_r(x, a):
return add_scalar(x, -a)
def sub_scalar_l(x, a):
return add_scalar(-x, a)
##########################
# Arithmetic : Sub
##########################
def sub_scalar_rinplace(x, a):
return add_scalar_inplace(x, -a)
# Elemwise #
class SubElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x - y
def grad(self, (x, y), gz):
return gz, -gz
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i - y_i;"
sub_elemwise = _constructor(SubElemwise)
class SubElemwiseInplace(SubElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x -= y
return x
sub_elemwise_inplace = _constructor(SubElemwiseInplace)
sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
# Scalar #
def sub_scalar_r(x, a):
return add_scalar(x, -a)
def sub_scalar_l(x, a):
return add_scalar(-x, a)
def sub_scalar_rinplace(x, a):
return add_scalar_inplace(x, -a)
sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
##########################
# Arithmetic : Mul
......@@ -459,14 +483,14 @@ class MulElemwise(_Elemwise):
return mul(y, gz), mul(x, gz)
def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i * %(y)s_i;"
mul_elemwise = constructor(MulElemwise)
mul_elemwise = _constructor(MulElemwise)
class MulElemwiseInplace(MulElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x *= y
return x
mul_elemwise_inplace = constructor(MulElemwiseInplace)
mul_elemwise_inplace = _constructor(MulElemwiseInplace)
# Scalar #
class Scale(TensorScalarOp):
......@@ -476,109 +500,123 @@ class Scale(TensorScalarOp):
def grad(self, (x, a), gz):
return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "%(x)s_i * _%(a)s"
scale = constructor(Scale)
scale = _constructor(Scale)
class ScaleInplace(Scale.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x *= a
return x
scale_inplace = constructor(ScaleInplace)
scale_inplace = _constructor(ScaleInplace)
mul = _scalar_switch(mul_elemwise, scale, scale)
mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace)
if 0:
##########################
# Arithmetic : Div
##########################
##########################
# Arithmetic : Div
##########################
# Elemwise #
class DivElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x / y
def grad(self, (x, y), gz):
return div(gz, y), -div(mul(x, gz), sqr(y))
def c_foreach(self, (x_i, y_i), (z_i, )):
return "z_i = x_i / y_i;"
# Elemwise #
class DivElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x / y
def grad(self, (x, y), gz):
return div(gz, y), -div(mul(x, gz), (y*y))
def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i / %(y)s_i;"
div_elemwise = _constructor(DivElemwise)
class DivElemwiseInplace(DivElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x /= y
return x
class DivElemwiseInplace(DivElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x /= y
return x
div_elemwise_inplace = _constructor(DivElemwiseInplace)
class InvElemwise(_Elemwise):
def impl(self, x):
return 1.0/x
def grad(self, x, gz):
return -gz / (x*x)
def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = 1.0 / %(x)s_i;" #TODO: cast 1.0 to the dtype of x
inv_elemwise = _constructor(InvElemwise)
# Scalar #
def div_scalar_r(x, a):
return scale(x, inv_elemwise(a))
# Scalar #
def div_scalar_r(x, a):
return scale(x, inv_elemwise(a))
def div_scalar_l(x, a):
return scale(inv_elemwise(x), a)
def div_scalar_l(x, a):
return scale(inv_elemwise(x), a)
def div_scalar_rinplace(x, a):
return scale_inplace(x, inv_elemwise(a))
def div_scalar_rinplace(x, a):
return scale_inplace(x, inv_elemwise(a))
div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
if 0:
##########################
# Arithmetic : Pow
##########################
##########################
# Arithmetic : Pow
##########################
# Elemwise #
class PowElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x ** y
def grad(self, (x, y), gz):
gx = gz * y * (pow_elemwise(x, y-1.0))
gs = gz * log(x) * pow_elemwise(x, y)
return gx, gs
def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = pow(%(x)s_i, %(y)s_i);"
pow_elemwise = _constructor(PowElemwise)
class PowElemwiseInplace(PowElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x **= y
return x
pow_elemwise_inplace = _constructor(PowElemwiseInplace)
# Scalar #
class PowScalarL(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return a ** x
def grad(self, (x, s), gz):
gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
return gx, gs
c_expr = "pow(%(a)s, %(x)s_i)"
pow_scalar_l = _constructor(PowScalarL)
class PowScalarR(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x ** a
def grad(self, (x, s), gz):
gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
return gx, gs
c_expr = "pow(%(x)s_i, _%(a)s)"
pow_scalar_r = _constructor(PowScalarR)
class PowScalarRInplace(PowScalarR.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x **= a
return x
pow_scalar_r_inplace = _constructor(PowScalarRInplace)
# Elemwise #
class PowElemwise(_Elemwise):
def impl(self, x, y):
_assert_same_shapes(x, y)
return x ** y
def grad(self, (x, s), gz):
gx = gz * s * (pow_elemwise(x, s-1.0))
gs = gz * log(x) * pow_elemwise(x, s)
return gx, gs
def c_foreach(self, (x_i, s_i), (z_i, )):
return "z_i = pow(x_i, s_i)"
class PowElemwiseInplace(PowElemwise.inplace_version()):
def impl(self, x, y):
_assert_same_shapes(x, y)
x **= y
return x
# Scalar #
class PowScalarL(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return a ** x
def grad(self, (x, s), gz):
gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
return gx, gs
c_expr = "pow(a, x_i)"
class PowScalarR(TensorScalarOp):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
return x ** a
def grad(self, (x, s), gz):
gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
return gx, gs
c_expr = "pow(x_i, a)"
class PowScalarRInplace(PowScalarR.inplace_version()):
def impl(self, x, a):
_assert_tensor_scalar(x, a)
x **= a
return x
pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_rinplace)
pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace)
if 0:
......
......@@ -83,16 +83,7 @@ class InvElemwiseInplace(InvElemwise.inplace_version()):
return x
class Exp(Elemwise):
def impl(self, x): return numpy.exp(x)
def grad(self, x, gz): return gz * exp(x)
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);"
class Log(Elemwise):
def impl(self, x): return numpy.log(x)
def grad(self, x, gz): return gz / x
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
class Log2(Elemwise):
def impl(self, x): return numpy.log2(x)
def grad(self, x, gz): return gz / (x * numpy.log(2))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论