injected new elemwise in tensor.py

上级 761c7f9f
...@@ -22,19 +22,37 @@ def env(inputs, outputs, validate = True, features = []): ...@@ -22,19 +22,37 @@ def env(inputs, outputs, validate = True, features = []):
class _test_DimShuffle(unittest.TestCase): class _test_DimShuffle(unittest.TestCase):
def test_straightforward(self): def with_linker(self, linker):
x, y, z = inputs() for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
e0 = DimShuffle(x, [1, 'x', 0]).out ((1, 2, 3), (1, 2), (2, 3)),
f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True) ((1, 2, 1, 3), (1, 3), (2, 3)),
assert f(numpy.ones((2, 3))).shape == (3, 1, 2) ((2, 3, 4), (2, 1, 0), (4, 3, 2)),
((2, 3, 4), ('x', 2, 1, 0, 'x'), (1, 4, 3, 2, 1)),
((1, 4, 3, 2, 1), (3, 2, 1), (2, 3, 4)),
((1, 1, 4), (1, 2), (1, 4))]:
x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
e = DimShuffle(x, shuffle).out
# print shuffle, e.owner.grad(e.owner.inputs, e.owner.outputs).owner.new_order
f = linker(env([x], [e])).make_function(inplace=False)
assert f(numpy.ones(xsh)).shape == zsh
def test_perform(self):
self.with_linker(gof.PerformLinker)
# def test_straightforward(self):
# x, y, z = inputs()
# e0 = DimShuffle(x, [1, 'x', 0]).out
# f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
# assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
class _test_Broadcast(unittest.TestCase): class _test_Broadcast(unittest.TestCase):
def with_linker(self, linker): def with_linker(self, linker):
for xsh, ysh in [((5, 5), (5, 5)), for xsh, ysh in [((3, 5), (3, 5)),
((5, 5), (1, 5)), ((3, 5), (1, 5)),
((5, 5), (5, 1)), ((3, 5), (3, 1)),
((1, 5), (5, 1)), ((1, 5), (5, 1)),
((1, 1), (1, 1)), ((1, 1), (1, 1)),
((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (2, 3, 4, 5)),
...@@ -52,7 +70,11 @@ class _test_Broadcast(unittest.TestCase): ...@@ -52,7 +70,11 @@ class _test_Broadcast(unittest.TestCase):
xv = numpy.asarray(numpy.random.rand(*xsh)) xv = numpy.asarray(numpy.random.rand(*xsh))
yv = numpy.asarray(numpy.random.rand(*ysh)) yv = numpy.asarray(numpy.random.rand(*ysh))
zv = xv + yv zv = xv + yv
# print "AAAAAAAAAAAAAAAAAA"
# print f(xv, yv)
# print zv
# print "BBBBBBBBBBBBBBBBBB"
self.failUnless((f(xv, yv) == zv).all()) self.failUnless((f(xv, yv) == zv).all())
def with_linker_inplace(self, linker): def with_linker_inplace(self, linker):
...@@ -105,7 +127,9 @@ class _test_CAReduce(unittest.TestCase): ...@@ -105,7 +127,9 @@ class _test_CAReduce(unittest.TestCase):
for xsh, tosum in [((5, 6), (0, 1)), for xsh, tosum in [((5, 6), (0, 1)),
((5, 6), (0, )), ((5, 6), (0, )),
((5, 6), (1, )), ((5, 6), (1, )),
((2, 3, 4, 5), (0, 1, 3))]: ((5, 6), ()),
((2, 3, 4, 5), (0, 1, 3)),
((), ())]:
x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x')) x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
e = CAReduce(Add, [x], dimensions_to_reduce = tosum).out e = CAReduce(Add, [x], dimensions_to_reduce = tosum).out
f = linker(env([x], [e])).make_function(inplace = False) f = linker(env([x], [e])).make_function(inplace = False)
...@@ -113,7 +137,13 @@ class _test_CAReduce(unittest.TestCase): ...@@ -113,7 +137,13 @@ class _test_CAReduce(unittest.TestCase):
zv = xv zv = xv
for axis in reversed(sorted(tosum)): for axis in reversed(sorted(tosum)):
zv = numpy.add.reduce(zv, axis) zv = numpy.add.reduce(zv, axis)
self.failUnless((f(xv) - zv < 1e-10).all()) # print "AAAAAAAAAAAAAAAAAA"
# print xsh, tosum
# print f(xv)
# print zv
# print f(xv) - zv
# print "BBBBBBBBBBBBBBBBBB"
self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
def test_perform(self): def test_perform(self):
self.with_linker(gof.PerformLinker) self.with_linker(gof.PerformLinker)
...@@ -123,27 +153,27 @@ class _test_CAReduce(unittest.TestCase): ...@@ -123,27 +153,27 @@ class _test_CAReduce(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
# unittest.main() unittest.main()
x = modes.build(Tensor('float64', [0, 0], name = 'x')) # x = modes.build(Tensor('float64', [0, 0], name = 'x'))
y = modes.build(Tensor('float64', [0, 0], name = 'y')) # y = modes.build(Tensor('float64', [0, 0], name = 'y'))
e = Broadcast(SquareDiff, (x, y), {0:0}).out # e = Broadcast(SquareDiff, (x, y), {0:0}).out
f = gof.CLinker(env([x, y], [e])).make_function(inplace = False) # f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
xv = numpy.random.rand(1000, 1000) # xv = numpy.random.rand(1000, 1000)
yv = numpy.random.rand(1000, 1000) # yv = numpy.random.rand(1000, 1000)
zv = numpy.random.rand(1000, 1000) # zv = numpy.random.rand(1000, 1000)
add = numpy.frompyfunc(lambda x, y: x + y, 2, 1) # add = numpy.frompyfunc(lambda x, y: x + y, 2, 1)
t0 = time.time() # t0 = time.time()
for i in xrange(100): # for i in xrange(100):
xv -= yv # xv -= yv
xv *= xv # xv *= xv
# xv += yv # # xv += yv
print time.time() - t0 # print time.time() - t0
t0 = time.time() # t0 = time.time()
for i in xrange(100): # for i in xrange(100):
f(xv, yv) # f(xv, yv)
print time.time() - t0 # print time.time() - t0
......
...@@ -7,7 +7,9 @@ from compile import Function, eval_outputs ...@@ -7,7 +7,9 @@ from compile import Function, eval_outputs
import gradient import gradient
import gof, gof.graph import gof, gof.graph
from gof.python25 import any from gof.python25 import any
import gof
from elemwise2 import DimShuffle
def _numpy_checker(x, y): def _numpy_checker(x, y):
""" """
...@@ -58,6 +60,15 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001 ...@@ -58,6 +60,15 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001
if not isinstance(analytic_grad, (list, tuple)): if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad] analytic_grad = [analytic_grad]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if num_grad.max_err(analytic_grad) > 1.0e-4: if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad) raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
...@@ -361,6 +372,15 @@ class T_add(unittest.TestCase): ...@@ -361,6 +372,15 @@ class T_add(unittest.TestCase):
f = Function([a,b], [fn(a, b)], linker_cls = gof.CLinker) f = Function([a,b], [fn(a, b)], linker_cls = gof.CLinker)
self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data))) self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data)))
def test_grad_scalar_l(self):
verify_grad(self, Add, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_grad_scalar_r(self):
verify_grad(self, Add, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_grad_row(self):
verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_grad_col(self):
verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
class T_abs(unittest.TestCase): class T_abs(unittest.TestCase):
def test_impl(self): def test_impl(self):
...@@ -381,8 +401,8 @@ class T_abs(unittest.TestCase): ...@@ -381,8 +401,8 @@ class T_abs(unittest.TestCase):
class AbsBadGrad(tensor._Elemwise): class AbsBadGrad(tensor._Elemwise):
def impl(self, x): def impl(self, x):
return numpy.abs(x) return numpy.abs(x)
def grad(self, x, gz): def grad(self, (x, ), (gz, )):
return scale(gz * sgn(x),0.9) return mul(gz * sgn(x),0.9),
def c_foreach(self, (x_i, ), (z_i, )): def c_foreach(self, (x_i, ), (z_i, )):
return "z_i = abs(x_i);" return "z_i = abs(x_i);"
...@@ -401,7 +421,7 @@ class T_fill(unittest.TestCase): ...@@ -401,7 +421,7 @@ class T_fill(unittest.TestCase):
o = t.owner o = t.owner
self.failUnless(o.inputs[0].broadcastable == (0,)) self.failUnless(o.inputs[0].broadcastable == (0,))
# self.failUnless(o.inputs[0].dtype[0:3] == 'int') # self.failUnless(o.inputs[0].dtype[0:3] == 'int')
self.failUnless(o.inputs[1].broadcastable == ()) self.failUnless(o.inputs[1].broadcastable == (1,))
# self.failUnless(o.inputs[1].dtype[0:3] == 'flo') # self.failUnless(o.inputs[1].dtype[0:3] == 'flo')
self.failUnless(o.outputs[0].broadcastable == (0,)) self.failUnless(o.outputs[0].broadcastable == (0,))
# self.failUnless(o.outputs[0].dtype[0:3] == 'flo') # self.failUnless(o.outputs[0].dtype[0:3] == 'flo')
...@@ -432,47 +452,70 @@ class T_mul(unittest.TestCase): ...@@ -432,47 +452,70 @@ class T_mul(unittest.TestCase):
def test_elemwise(self): def test_elemwise(self):
a = astensor(0.0) a = astensor(0.0)
b = astensor(0.0) b = astensor(0.0)
check_eq2_both(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0) check_eq2_both(self, [a,b], mul(a,b), [3.0, 4.0], 12.0)
check_eq2_both(self, [a,b], mul_elemwise(b,a), [-1.0,2.0], -2.0) check_eq2_both(self, [a,b], mul(b,a), [-1.0,2.0], -2.0)
self.failUnless(isinstance(mul(a,b).owner, Scale)) #self.failUnless(isinstance(mul(a,b).owner, Scale))
a = astensor(numpy.ones(2)) a = astensor(numpy.ones(2))
b = astensor(numpy.ones(2)) b = astensor(numpy.ones(2))
aa = numpy.asarray([-0.5, 4.0]) aa = numpy.asarray([-0.5, 4.0])
bb = numpy.asarray([-0.5, 2.0]) bb = numpy.asarray([-0.5, 2.0])
check_eq2_both(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0])) check_eq2_both(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
check_eq2_both(self, [a,b], mul_elemwise(a,b), [bb,aa], numpy.asarray([0.25, 8.0])) check_eq2_both(self, [a,b], mul(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
self.failUnless(isinstance(mul(a,b).owner, MulElemwise)) #self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
def test_scalar(self): def test_scalar(self):
r = numpy.random.rand(2,3) r = numpy.random.rand(2,3)
a = astensor(r) a = astensor(r)
b = astensor(2.0) b = astensor(2.0)
check_eq2_both(self, [a,b], scale(a,b), [r, 2.0], r*2.0) check_eq2_both(self, [a,b], mul(a,b), [r, 2.0], r*2.0)
check_eq2_both(self, [a,b], scale(a,b), [r, 4.0], r*4.0) check_eq2_both(self, [a,b], mul(a,b), [r, 4.0], r*4.0)
self.failUnless(b.data == 2.0) self.failUnless(b.data == 2.0)
def test_operator(self): def test_rowcol(self):
a = astensor([1,1]) r1 = numpy.random.rand(3,5)
aa = astensor([1,1]) r2 = numpy.random.rand(1,5)
b = astensor(4) r3 = numpy.random.rand(3,1)
self.failUnless(isinstance((a*b).owner, Scale)) a1, a2, a3 = astensor(r1), astensor(r2), astensor(r3)
self.failUnless(isinstance((b*a).owner, Scale)) check_eq2_both(self, [a1,a2], mul(a1,a2), [r1, r2], r1*r2)
self.failUnless(isinstance((a*aa).owner, MulElemwise)) check_eq2_both(self, [a1,a3], mul(a1,a3), [r1, r3], r1*r3)
self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_grad_elemwise(self):
verify_grad(self, Mul, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_grad_scalar_l(self):
verify_grad(self, Mul, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_grad_scalar_r(self):
verify_grad(self, Mul, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_grad_row(self):
verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_grad_row2(self):
op = lambda x, y: Mul(x, DimShuffle(y, ['x', 0]).out)
verify_grad(self, op, [numpy.random.rand(3, 5), numpy.random.rand(5)])
def test_grad_col(self):
verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
# def test_operator(self):
# a = astensor([1,1])
# aa = astensor([1,1])
# b = astensor(4)
# self.failUnless(isinstance((a*b).owner, Scale))
# self.failUnless(isinstance((b*a).owner, Scale))
# self.failUnless(isinstance((a*aa).owner, MulElemwise))
# self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_wrong_shapes(self): def test_wrong_shapes(self):
a = astensor(numpy.ones(3)) a = astensor(numpy.ones(3))
b = astensor(numpy.ones(4)) b = astensor(numpy.ones(4))
try: try:
check_eq2(self, [a,b], MulElemwise(a,b).out, check_eq2(self, [a,b], Mul(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0) [numpy.ones(3), numpy.ones(4)], 1.0)
self.fail() self.fail()
except ValueError, e: except ValueError, e:
self.failUnless(e[0] is tensor._assert_same_shapes.E_shape) self.failUnless('shape mismatch' in str(e))
try: try:
check_eq2_c(self, [a,b], MulElemwise(a,b).out, check_eq2_c(self, [a,b], Mul(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0) [numpy.ones(3), numpy.ones(4)], 1.0)
self.fail() self.fail()
except ValueError, e: except ValueError, e:
...@@ -482,14 +525,14 @@ class T_div(unittest.TestCase): ...@@ -482,14 +525,14 @@ class T_div(unittest.TestCase):
def setUp(self): def setUp(self):
numpy.random.seed(9999) numpy.random.seed(9999)
def test_grad_e(self): def test_grad_e(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())]) verify_grad(self, Div, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)]) verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1]) verify_grad(self, Div, [numpy.ones(()), numpy.ones(())])
def test_grad_sl(self): def test_grad_sl(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())]) verify_grad(self, Div, [numpy.ones((3, 5)), numpy.ones((1, 1))])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)]) verify_grad(self, Div, [numpy.random.rand(3), numpy.ones((1, ))])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1]) verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(1,1)])
class T_log2(unittest.TestCase): class T_log2(unittest.TestCase):
def test0(self): def test0(self):
...@@ -509,12 +552,16 @@ class T_pow(unittest.TestCase): ...@@ -509,12 +552,16 @@ class T_pow(unittest.TestCase):
def setUp(self): def setUp(self):
numpy.random.seed(9999) numpy.random.seed(9999)
def test_elemwise(self): def test_elemwise(self):
verify_grad(self, DivElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1]) verify_grad(self, Div, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
verify_grad(self, PowElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)]) verify_grad(self, Pow, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_scalar_l(self): def test_scalar_l(self):
verify_grad(self, PowScalarL, [numpy.random.rand(3), numpy.asarray(3.0)]) verify_grad(self, Pow, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_scalar_r(self): def test_scalar_r(self):
verify_grad(self, PowScalarR, [numpy.random.rand(3), numpy.asarray(3.0)]) verify_grad(self, Pow, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_row(self):
verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_col(self):
verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
class _testCase_matinv(unittest.TestCase): class _testCase_matinv(unittest.TestCase):
......
...@@ -94,7 +94,7 @@ class BaseTensor(ResultBase): ...@@ -94,7 +94,7 @@ class BaseTensor(ResultBase):
'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'), 'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype] 'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype]
except KeyError: except KeyError:
raise TypeError("Unsupported dtype for BaseTensor: %s" % self.dtype) raise TypeError("Unsupported dtype for %s: %s" % (self.__class__.__name__, self.dtype))
# #
# Hash for constant folding # Hash for constant folding
......
...@@ -3,12 +3,16 @@ import elemwise_cgen as cgen ...@@ -3,12 +3,16 @@ import elemwise_cgen as cgen
import numpy import numpy
from gof import Op, Viewer, Destroyer from gof import Op, Viewer, Destroyer
from tensor import Tensor from base_tensor import BaseTensor as Tensor
from scalar import upcast, Scalar from scalar import upcast, Scalar
import scalar_ops import scalar_ops
import gof import gof
def astensor(data):
assert isinstance(data, Tensor)
return data
################## ##################
### DimShuffle ### ### DimShuffle ###
...@@ -18,6 +22,8 @@ class DimShuffle(Op, Viewer): ...@@ -18,6 +22,8 @@ class DimShuffle(Op, Viewer):
def __init__(self, input, new_order, inplace = True): def __init__(self, input, new_order, inplace = True):
input = astensor(input)
ib = input.broadcastable ib = input.broadcastable
ob = [] ob = []
for value in new_order: for value in new_order:
...@@ -35,13 +41,23 @@ class DimShuffle(Op, Viewer): ...@@ -35,13 +41,23 @@ class DimShuffle(Op, Viewer):
self.outputs = output, self.outputs = output,
self.inplace = inplace self.inplace = inplace
self.numorder = [x for x in new_order if type(x) == int] self.drop = []
self.is_transposition = sorted(new_order) == range(len(ib)) self.augment = []
self.dup_dims = len(set(self.numorder)) != len(self.numorder) i2j = {}
self.all_dims = len(set(self.numorder)) == len(ib) j = 0
if self.dup_dims or not self.all_dims: for i, b in enumerate(ib):
raise NotImplementedError("You must provide a permutation of *all* the input dimensions with *no duplicates*.") if i not in new_order:
if b == 1:
self.drop.append(i)
else:
raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
else:
i2j[i] = j
j += 1
self.shuffle = [i2j[x] for x in new_order if x != 'x']
self.augment = [i for i, x in enumerate(new_order) if x == 'x']
def clone_with_new_inputs(self, *new_inputs): def clone_with_new_inputs(self, *new_inputs):
return DimShuffle(new_inputs[0], self.new_order, self.inplace) return DimShuffle(new_inputs[0], self.new_order, self.inplace)
...@@ -53,19 +69,31 @@ class DimShuffle(Op, Viewer): ...@@ -53,19 +69,31 @@ class DimShuffle(Op, Viewer):
return {} return {}
def perform(self): def perform(self):
res = self.inputs[0].data.transpose(self.numorder) res = self.inputs[0].data
shape = list(res.shape) shape = list(res.shape)
new_shape = [] for drop in reversed(self.drop):
for entry in self.new_order: shape.pop(drop)
if entry == 'x': res = res.reshape(shape)
new_shape.append(1)
else: res = res.transpose(self.shuffle)
new_shape.append(shape.pop(0))
res = res.reshape(new_shape) shape = list(res.shape)
for augm in self.augment:
shape.insert(augm, 1)
res = res.reshape(shape)
if not self.inplace: if not self.inplace:
res = numpy.copy(res) res = numpy.copy(res)
self.outputs[0].data = res self.outputs[0].data = res
def grad(self, (x, ), (gz, )):
grad_order = ['x'] * len(self.inputs[0].broadcastable)
for i, x in enumerate(self.new_order):
if x != 'x':
grad_order[x] = i
return DimShuffle(gz, grad_order).out,
def __str__(self): def __str__(self):
return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order) return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order)
...@@ -90,6 +118,9 @@ class Transpose(DimShuffle): ...@@ -90,6 +118,9 @@ class Transpose(DimShuffle):
class Broadcast(Op, Destroyer): class Broadcast(Op, Destroyer):
def __init__(self, scalar_opclass, inputs, inplace_pattern = {}): def __init__(self, scalar_opclass, inputs, inplace_pattern = {}):
inputs = map(astensor, inputs)
try: try:
assert len(set([len(input.broadcastable) for input in inputs])) == 1 assert len(set([len(input.broadcastable) for input in inputs])) == 1
except (AssertionError, AttributeError): except (AssertionError, AttributeError):
...@@ -141,15 +172,29 @@ class Broadcast(Op, Destroyer): ...@@ -141,15 +172,29 @@ class Broadcast(Op, Destroyer):
if r in scalar_ograds: if r in scalar_ograds:
return ograds[scalar_ograds.index(r)] return ograds[scalar_ograds.index(r)]
op = r.owner op = r.owner
if op is None:
b = [1] * len(inputs[0].broadcastable)
res = astensor(numpy.asarray(r.data).reshape(b),
broadcastable = b)
return res
op_class = op.__class__ op_class = op.__class__
bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {}) bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {}).out
return bcasted return bcasted
ret = [] ret = []
for scalar_igrad, input in zip(scalar_igrads, inputs): for scalar_igrad, input in zip(scalar_igrads, inputs):
r = transform(scalar_igrad) r = transform(scalar_igrad)
to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast] to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast]
if to_sum: if to_sum:
shuffle = []
j = 0
for bcast in input.broadcastable:
if bcast == 1:
shuffle.append('x')
else:
shuffle.append(j)
j += 1
sr = Sum(r, axis = to_sum).out sr = Sum(r, axis = to_sum).out
sr = DimShuffle(sr, shuffle).out
ret.append(sr) ret.append(sr)
else: else:
ret.append(r) ret.append(r)
...@@ -269,16 +314,19 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None): ...@@ -269,16 +314,19 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
New.__name__ = "Tensor" + scalar_opclass.__name__ New.__name__ = "Tensor" + scalar_opclass.__name__
return New return New
def broadcast(op): def wrap_broadcast(op):
def instantiate(*inputs): def instantiate(*inputs):
inputs = map(astensor, inputs)
target_length = max([len(input.broadcastable) for input in inputs]) target_length = max([len(input.broadcastable) for input in inputs])
args = [] args = []
for input in inputs: for input in inputs:
difference = target_length - len(input.broadcastable) length = len(input.broadcastable)
difference = target_length - length
if not difference: if not difference:
args.append(input) args.append(input)
else: else:
args.append(DimShuffle(input, ['x']*difference + range(length))) args.append(DimShuffle(input, ['x']*difference + range(length)).out)
return op(*args) return op(*args)
return instantiate return instantiate
...@@ -319,6 +367,8 @@ class CAReduce(Op): ...@@ -319,6 +367,8 @@ class CAReduce(Op):
""" """
def __init__(self, scalar_opclass, inputs, dimensions_to_reduce = None): def __init__(self, scalar_opclass, inputs, dimensions_to_reduce = None):
inputs = map(astensor, inputs)
if scalar_opclass.nin != 2 or scalar_opclass.nout != 1: if scalar_opclass.nin != 2 or scalar_opclass.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.") raise NotImplementedError("CAReduce only supports binary functions with a single output.")
if len(inputs) != 1: if len(inputs) != 1:
...@@ -346,9 +396,13 @@ class CAReduce(Op): ...@@ -346,9 +396,13 @@ class CAReduce(Op):
def perform(self): def perform(self):
result = self.inputs[0].data result = self.inputs[0].data
for dimension in reversed(sorted(self.dimensions_to_reduce)): to_reduce = reversed(sorted(self.dimensions_to_reduce))
result = self.ufunc.reduce(result, dimension) if to_reduce:
self.outputs[0].data = result for dimension in to_reduce:
result = self.ufunc.reduce(result, dimension)
self.outputs[0].data = result
else:
self.outputs[0].data = numpy.copy(result)
def _c_all(self, inames, onames, sub): def _c_all(self, inames, onames, sub):
...@@ -363,6 +417,9 @@ class CAReduce(Op): ...@@ -363,6 +417,9 @@ class CAReduce(Op):
tosum = self.dimensions_to_reduce tosum = self.dimensions_to_reduce
if tosum == ():
return Broadcast(scalar_ops.Identity, (input, ))._c_all(inames, onames, sub)
order1 = [i for i in xrange(len(input.broadcastable)) if i not in tosum] order1 = [i for i in xrange(len(input.broadcastable)) if i not in tosum]
order = order1 + list(tosum) order = order1 + list(tosum)
...@@ -459,7 +516,19 @@ def make_reduce(scalar_opclass, name = None): ...@@ -459,7 +516,19 @@ def make_reduce(scalar_opclass, name = None):
New.__name__ = "Reduce" + scalar_opclass.__name__ New.__name__ = "Reduce" + scalar_opclass.__name__
return New return New
Sum = make_reduce(scalar_ops.Add, name = 'Sum') class Sum(make_reduce(scalar_ops.Add)):
def grad(self, (x, ), (gz, )):
if self.dimensions_to_reduce == ():
return gz,
new_dims = []
i = 0
for j, _ in enumerate(x.broadcastable):
if j in self.dimensions_to_reduce:
new_dims.append('x')
else:
new_dims.append(i)
i += 1
return Broadcast(scalar_ops.Second, (x, DimShuffle(gz, new_dims).out)).out,
def reduce(op): def reduce(op):
......
...@@ -832,8 +832,14 @@ class DualLinker(Linker): ...@@ -832,8 +832,14 @@ class DualLinker(Linker):
op_order_1 = env1.toposort() op_order_1 = env1.toposort()
op_order_2 = [equiv[op.outputs[0]].owner for op in op_order_1] # we need to have the exact same order so we can compare each step op_order_2 = [equiv[op.outputs[0]].owner for op in op_order_1] # we need to have the exact same order so we can compare each step
def c_make_thunk(op):
try:
return CLinker(op).make_thunk(True)[0]
except AbstractFunctionError:
return op.perform
thunks1 = [op.perform for op in op_order_1] thunks1 = [op.perform for op in op_order_1]
thunks2 = [CLinker(op).make_thunk(True)[0] for op in op_order_2] thunks2 = [c_make_thunk(op) for op in op_order_2]
def f(): def f():
for input1, input2 in zip(env1.inputs, env2.inputs): for input1, input2 in zip(env1.inputs, env2.inputs):
......
...@@ -76,14 +76,17 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -76,14 +76,17 @@ def grad_sources_inputs(sources, graph_inputs):
#if all output gradients are None, continue #if all output gradients are None, continue
if all(map(lambda x:x is None, g_outputs)): continue if all(map(lambda x:x is None, g_outputs)): continue
output_arg = _unpack_result(g_outputs) # output_arg = _unpack_result(g_outputs)
input_arg = _unpack_result(op.inputs) # input_arg = _unpack_result(op.inputs)
output_arg = g_outputs
input_arg = op.inputs
op_grad = op.grad(input_arg, output_arg) op_grad = op.grad(input_arg, output_arg)
if op_grad is None: if op_grad is None:
raise ValueError(_msg_retNone, op.__class__) raise ValueError(_msg_retNone, op.__class__)
if isinstance(op_grad, float): if isinstance(op_grad, float):
raise TypeError('wtf!!!!!!!!', op) raise TypeError('wtf!!!!!!!!', op)
g_inputs = _pack_result(op_grad) g_inputs = op_grad #_pack_result(op_grad)
assert isinstance(g_inputs, (list, tuple)) assert isinstance(g_inputs, (list, tuple))
if len(g_inputs) != len(op.inputs): if len(g_inputs) != len(op.inputs):
raise ValueError(_msg_badlen, raise ValueError(_msg_badlen,
...@@ -123,6 +126,10 @@ class numeric_grad: ...@@ -123,6 +126,10 @@ class numeric_grad:
""" """
gf = [numpy.ndarray(x.shape) for x in pt] gf = [numpy.ndarray(x.shape) for x in pt]
f_pt = f(*pt) f_pt = f(*pt)
if isinstance(f, (list, tuple)):
f_pt = [numpy.copy(x) for x in f_pt]
else:
f_pt = numpy.copy(f_pt)
for idx in xrange(len(gf)): for idx in xrange(len(gf)):
if len(pt[idx].shape) == 0: if len(pt[idx].shape) == 0:
......
...@@ -12,6 +12,10 @@ def as_scalar(x, name = None): ...@@ -12,6 +12,10 @@ def as_scalar(x, name = None):
s = Scalar('float64', name = name) s = Scalar('float64', name = name)
s.data = x s.data = x
return s return s
if isinstance(x, int):
s = Scalar('int32', name = name)
s.data = x
return s
if isinstance(x, Scalar): if isinstance(x, Scalar):
return x return x
...@@ -45,7 +49,8 @@ class Scalar(ResultBase): ...@@ -45,7 +49,8 @@ class Scalar(ResultBase):
# and self.data == other.data # and self.data == other.data
def dtype_specs(self): def dtype_specs(self):
return {'float64': (float, 'double', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble')}[self.dtype] return {'float64': (float, 'npy_float64', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble'),
'int32': (int, 'npy_int32', 'PyInt_Check', 'PyInt_AsLong', 'PyInt_FromLong')}[self.dtype]
def c_declare(self, name, sub): def c_declare(self, name, sub):
return """ return """
......
...@@ -18,7 +18,7 @@ class Sub(BinaryScalarOp): ...@@ -18,7 +18,7 @@ class Sub(BinaryScalarOp):
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s - %(y)s;" % locals() return "%(z)s = %(x)s - %(y)s;" % locals()
def grad(self, (x, y), (gz, )): def grad(self, (x, y), (gz, )):
return gz, -gz return gz, neg(gz)
class Mul(BinaryScalarOp): class Mul(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
...@@ -34,62 +34,119 @@ class Div(BinaryScalarOp): ...@@ -34,62 +34,119 @@ class Div(BinaryScalarOp):
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s / %(y)s;" % locals() return "%(z)s = %(x)s / %(y)s;" % locals()
def grad(self, (x, y), (gz, )): def grad(self, (x, y), (gz, )):
return div(gz, y), -div(mul(x, gz), y*y) return div(gz, y), neg(div(mul(x, gz), mul(y, y)))
class Pow(BinaryScalarOp): class Pow(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return x ** y return x ** y
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = pow(%(x)s, %(y)s);" % locals() return "%(z)s = pow(%(x)s, %(y)s);" % locals()
def grad(self, (x, y), (gz, )):
return mul(gz, mul(y, pow(x, sub(y, as_scalar(1))))), mul(gz, mul(log(x), pow(x, y)))
class First(BinaryScalarOp): class First(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return x return x
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s;" % locals() return "%(z)s = %(x)s;" % locals()
def grad(self, (x, y), (gz, )):
return gz, None
class Second(BinaryScalarOp): class Second(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return y return y
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(y)s;" % locals() return "%(z)s = %(y)s;" % locals()
def grad(self, (x, y), (gz, )):
return None, gz
class SquareDiff(BinaryScalarOp): # class SquareDiff(BinaryScalarOp):
def impl(self, x, y): # def impl(self, x, y):
diff = (x - y) # diff = (x - y)
return diff * diff # return diff * diff
def c_code(self, (x, y), (z, ), sub): # def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals() # return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals()
class Identity(UnaryScalarOp):
def impl(self, x):
return x
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s;" % locals()
def grad(self, (x, y), (gz, )):
return gz,
class Neg(UnaryScalarOp): class Neg(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return -x return -x
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return -gz return neg(gz),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = -%(x)s;" % locals() return "%(z)s = -%(x)s;" % locals()
class Abs(UnaryScalarOp):
def impl(self, x):
return numpy.abs(x)
def grad(self, (x, ), (gz, )):
return mul(gz, sgn(x)),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = abs(%(x)s);" % locals()
class Sgn(UnaryScalarOp):
def impl(self, x):
return numpy.abs(x) / x
def grad(self, (x, ), (gz, )):
return None,
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s/abs(%(x)s);" % locals() # TODO: C use copysign
class Inv(UnaryScalarOp): class Inv(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return 1 / x return 1 / x
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return -gz / (x*x) return div(neg(gz), mul(x, x)),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = 1 / %(x)s;" % locals() return "%(z)s = 1 / %(x)s;" % locals()
class Log(UnaryScalarOp): class Log(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return math.log(x) return math.log(x)
def grad(self, (x, ), (gz, )):
return div(gz, x),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = log(%(x)s);" % locals() return "%(z)s = log(%(x)s);" % locals()
class Log2(UnaryScalarOp):
def impl(self, x):
return numpy.log2(x)
def grad(self, (x, ), (gz, )):
return div(gz, mul(x, as_scalar(math.log(2.0)))),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = log2(%(x)s);" % locals()
class Exp(UnaryScalarOp): class Exp(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return math.exp(x) return math.exp(x)
def grad(self, (x, ), (gz, )):
return mul(gz, exp(x)),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = exp(%(x)s);" % locals() return "%(z)s = exp(%(x)s);" % locals()
class Sqr(UnaryScalarOp):
def impl(self, x):
return x*x
def grad(self, (x, ), (gz, )):
return mul(gz, mul(x, as_scalar(2))),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s * %(x)s;" % locals()
class Sqrt(UnaryScalarOp):
def impl(self, x):
return math.sqrt(x)
def grad(self, (x, ), (gz, )):
return div(mul(gz, as_scalar(0.5)), sqrt(x)),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = sqrt(%(x)s);" % locals()
# class Sigmoid(UnaryComposite): # class Sigmoid(UnaryComposite):
# def expand_impl(self, x): # def expand_impl(self, x):
......
...@@ -12,6 +12,9 @@ from base_tensor import BaseTensor, BaseTensorOp ...@@ -12,6 +12,9 @@ from base_tensor import BaseTensor, BaseTensorOp
from elemwise import Elemwise from elemwise import Elemwise
import blas # for gemm, dot import blas # for gemm, dot
import elemwise2 as s2t
import scalar_ops as scal
class Tensor(BaseTensor): class Tensor(BaseTensor):
""" """
...@@ -65,7 +68,9 @@ class Tensor(BaseTensor): ...@@ -65,7 +68,9 @@ class Tensor(BaseTensor):
#SLICING #SLICING
def __getitem__(self, item): return subtensor(self, item) def __getitem__(self, item): return subtensor(self, item)
def __getslice__(self, *args): return subtensor(self, slice(*args)) def __getslice__(self, *args): return subtensor(self, slice(*args))
s2t.Tensor = Tensor
# alternate Tensor constructor # alternate Tensor constructor
def astensor(data, broadcastable=None, role=None, name=None): def astensor(data, broadcastable=None, role=None, name=None):
"""Return a Tensor containing given data""" """Return a Tensor containing given data"""
...@@ -79,6 +84,7 @@ def astensor(data, broadcastable=None, role=None, name=None): ...@@ -79,6 +84,7 @@ def astensor(data, broadcastable=None, role=None, name=None):
rval = Tensor(data.dtype, broadcastable, role, name) rval = Tensor(data.dtype, broadcastable, role, name)
rval.data = data # will raise if broadcastable was mis-specified rval.data = data # will raise if broadcastable was mis-specified
return rval return rval
s2t.astensor = astensor
############################ ############################
...@@ -229,15 +235,23 @@ class TensorScalarOp(_Elemwise): ...@@ -229,15 +235,23 @@ class TensorScalarOp(_Elemwise):
# Unary Operations # Unary Operations
########################## ##########################
class Abs(_Elemwise): # class Abs(_Elemwise):
def impl(self, x): # def impl(self, x):
return numpy.abs(x) # return numpy.abs(x)
def grad(self, x, gz): # def grad(self, x, gz):
return gz * Sgn(x).out #TODO: handle the corner case (get it? pun?) (there's a special place in hell for people like you) # return gz * Sgn(x).out #TODO: handle the corner case (get it? pun?) (there's a special place in hell for people like you)
def c_foreach(self, (x_i, ), (z_i, )): # def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = abs(%(x)s_i);" # return "%(z)s_i = abs(%(x)s_i);"
# #Constructor not necessary because builtin abs() does this
Abs = s2t.make_broadcast(scal.Abs)
AbsInplace = s2t.make_broadcast(scal.Abs, {0:0})
#Constructor not necessary because builtin abs() does this #Constructor not necessary because builtin abs() does this
abs_inplace = gof.op.constructor(s2t.wrap_broadcast(AbsInplace))
class Argmax(Op): class Argmax(Op):
nin=2 # tensor, axis nin=2 # tensor, axis
nout=2 # max val, max idx nout=2 # max val, max idx
...@@ -269,91 +283,152 @@ def max(x, axis=None): ...@@ -269,91 +283,152 @@ def max(x, axis=None):
# but when Argmax.c_impl() is in place, it should be fine. # but when Argmax.c_impl() is in place, it should be fine.
return argmax(x,axis)[0] return argmax(x,axis)[0]
class Exp(_Elemwise): # class Exp(_Elemwise):
def impl(self, x): return numpy.exp(x) # def impl(self, x): return numpy.exp(x)
def grad(self, x, gz): return gz * exp(x) # def grad(self, x, gz): return gz * exp(x)
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);" # def c_foreach(self, (x_i, ), (z_i, )): return "z_i = exp(x_i);"
exp = gof.op.constructor(Exp) # exp = gof.op.constructor(Exp)
Exp = s2t.make_broadcast(scal.Exp)
ExpInplace = s2t.make_broadcast(scal.Exp, {0:0})
exp = gof.op.constructor(s2t.wrap_broadcast(Exp))
exp_inplace = gof.op.constructor(s2t.wrap_broadcast(ExpInplace))
# class Neg(_Elemwise):
# def impl(self, x):
# return -x
# def grad(self, x, gz):
# return -gz
# def c_foreach(self, (x_i, ), (z_i, )):
# return "%(z)s_i = -%(x)s_i;"
# #Constructor not necessary because unary '-' does this
Neg = s2t.make_broadcast(scal.Neg)
NegInplace = s2t.make_broadcast(scal.Neg, {0:0})
neg = gof.op.constructor(s2t.wrap_broadcast(Neg))
neg_inplace = gof.op.constructor(s2t.wrap_broadcast(NegInplace))
# class Log(_Elemwise):
# def impl(self, x): return numpy.log(x)
# def grad(self, x, gz): return gz / x
# def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
# log = gof.op.constructor(Log)
Log = s2t.make_broadcast(scal.Log)
LogInplace = s2t.make_broadcast(scal.Log, {0:0})
log = gof.op.constructor(s2t.wrap_broadcast(Log))
log_inplace = gof.op.constructor(s2t.wrap_broadcast(LogInplace))
# class Log2(_Elemwise):
# def impl(self, x): return numpy.log2(x)
# def grad(self, x, gz): return gz / (x * numpy.log(2.0))
# def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = log2(%(x)s_i);"
# log2 = gof.op.constructor(Log2)
Log2 = s2t.make_broadcast(scal.Log2)
Log2Inplace = s2t.make_broadcast(scal.Log2, {0:0})
log2 = gof.op.constructor(s2t.wrap_broadcast(Log2))
log2_inplace = gof.op.constructor(s2t.wrap_broadcast(Log2Inplace))
# class Sgn(_Elemwise):
# def impl(self, x):
# return numpy.abs(x) / x
# def grad(self, x, gz):
# return [None]
# def c_foreach(self, (x_i, ), (z_i, )):
# return "%(z)s_i = %(x)s_i/abs(%(x)s_i);" # TODO: C use copysign
# sgn = gof.op.constructor(Sgn)
Sgn = s2t.make_broadcast(scal.Sgn)
SgnInplace = s2t.make_broadcast(scal.Sgn, {0:0})
sgn = gof.op.constructor(s2t.wrap_broadcast(Sgn))
sgn_inplace = gof.op.constructor(s2t.wrap_broadcast(SgnInplace))
# class Sqr(_Elemwise):
# def impl(self, x): return x * x
# def grad(self, x, gz): return 2.0 * x * gz
# def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = %(x)s_i * %(x)s_i;"
# sqr = gof.op.constructor(Sqr)
Sqr = s2t.make_broadcast(scal.Sqr)
SqrInplace = s2t.make_broadcast(scal.Sqr, {0:0})
sqr = gof.op.constructor(s2t.wrap_broadcast(Sqr))
sqr_inplace = gof.op.constructor(s2t.wrap_broadcast(SqrInplace))
# class Sqrt(_Elemwise):
# def impl(self, x): return numpy.sqrt(x)
# def grad(self, x, gz): return 0.5 * gz / sqrt(x)
# def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = sqrt(%(x)s_i);"
# sqrt = gof.op.constructor(Sqrt)
Sqrt = s2t.make_broadcast(scal.Sqrt)
SqrtInplace = s2t.make_broadcast(scal.Sqrt, {0:0})
sqrt = gof.op.constructor(s2t.wrap_broadcast(Sqrt))
sqrt_inplace = gof.op.constructor(s2t.wrap_broadcast(SqrtInplace))
# class Sum(_Elemwise):
# def impl(self, x):
# return numpy.sum(x)
# def grad(self, (x, ), (gz, )):
# return fill(x, gz),
# def propagate_broadcastable(self, *inputs):
# return [()]
# def c_init(self, (x, ), (sum, )):
# return "dtype_%(sum)s* %(sum)sp = ((dtype_%(sum)s*)PyArray_DATA(%(sum)s)); %(sum)sp[0] = 0;"
# def c_foreach(self, (x_i, ), (sum, )):
# return "%(sum)sp[0] += %(x)s_i;"
# sum0 = gof.op.constructor(Sum)
Sum = s2t.Sum
sum = gof.op.constructor(Sum)
# class Fill(_Elemwise):
# def impl(self, model, value):
# return (model * 0) + value #TODO: we can probably do better than this
# def grad(self, (model, value), (gz, )):
# return None, sum(gz)
# def c_init(self, (model, value), (z, )):
# return "dtype_%(value)s %(value)s0 = ((dtype_%(value)s*)PyArray_DATA(%(value)s))[0];"
# def c_foreach(self, (model_i, value), (z_i, )):
# return "%(z)s_i = %(value)s0;"
# fill = gof.op.constructor(Fill)
def broadcast_package(scalar_opclass, name, inplace_versions = True):
C = s2t.make_broadcast(scalar_opclass, name = name)
c = gof.op.constructor(s2t.wrap_broadcast(C))
if inplace_versions:
CInplace = s2t.make_broadcast(scalar_opclass, name = name+"Inplace")
c_inplace = gof.op.constructor(s2t.wrap_broadcast(CInplace))
return C, c, CInplace, c_inplace
else:
return C, c
class Neg(_Elemwise): # Fill = s2t.make_broadcast(scal.Second)
def impl(self, x): # FillInplace = s2t.make_broadcast(scal.Second, {0:0})
return -x # fill = gof.op.constructor(s2t.wrap_broadcast(Fill))
def grad(self, x, gz): # fill_inplace = gof.op.constructor(s2t.wrap_broadcast(FillInplace))
return -gz
def c_foreach(self, (x_i, ), (z_i, )): Fill, fill, FillInplace, fill_inplace = broadcast_package(scal.Second, 'Fill')
return "%(z)s_i = -%(x)s_i;"
#Constructor not necessary because unary '-' does this
class Log(_Elemwise):
def impl(self, x): return numpy.log(x)
def grad(self, x, gz): return gz / x
def c_foreach(self, (x_i, ), (z_i, )): return "z_i = log(x_i);"
log = gof.op.constructor(Log)
class Log2(_Elemwise):
def impl(self, x): return numpy.log2(x)
def grad(self, x, gz): return gz / (x * numpy.log(2.0))
def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = log2(%(x)s_i);"
log2 = gof.op.constructor(Log2)
class Sgn(_Elemwise):
def impl(self, x):
return numpy.abs(x) / x
def grad(self, x, gz):
return [None]
def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i/abs(%(x)s_i);" # TODO: C use copysign
sgn = gof.op.constructor(Sgn)
class Sqr(_Elemwise):
def impl(self, x): return x * x
def grad(self, x, gz): return 2.0 * x * gz
def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = %(x)s_i * %(x)s_i;"
sqr = gof.op.constructor(Sqr)
class Sqrt(_Elemwise):
def impl(self, x): return numpy.sqrt(x)
def grad(self, x, gz): return 0.5 * gz / sqrt(x)
def c_foreach(self, (x_i, ), (z_i, )): return "%(z)s_i = sqrt(%(x)s_i);"
sqrt = gof.op.constructor(Sqrt)
class Sum(_Elemwise):
def impl(self, x):
return numpy.sum(x)
def grad(self, x, gz):
return fill(x, gz)
def propagate_broadcastable(self, *inputs):
return [()]
def c_init(self, (x, ), (sum, )):
return "dtype_%(sum)s* %(sum)sp = ((dtype_%(sum)s*)PyArray_DATA(%(sum)s)); %(sum)sp[0] = 0;"
def c_foreach(self, (x_i, ), (sum, )):
return "%(sum)sp[0] += %(x)s_i;"
sum = gof.op.constructor(Sum)
class Fill(_Elemwise):
def impl(self, model, value):
return (model * 0) + value #TODO: we can probably do better than this
def grad(self, (model, value), gz):
return None, sum(gz)
def c_init(self, (model, value), (z, )):
return "dtype_%(value)s %(value)s0 = ((dtype_%(value)s*)PyArray_DATA(%(value)s))[0];"
def c_foreach(self, (model_i, value), (z_i, )):
return "%(z)s_i = %(value)s0;"
fill = gof.op.constructor(Fill)
def ones_like(model): def ones_like(model):
return fill(model, 1.0) return fill(model, 1.0)
def zeros_like(model): def zeros_like(model):
return fill(model, 0.0) return fill(model, 0.0)
class TensorCopy(_Elemwise): # class TensorCopy(_Elemwise):
def impl(self, x): # def impl(self, x):
return numpy.array(x) # return numpy.array(x)
def grad(self, x, gz): # def grad(self, x, gz):
return gz # return gz
def c_foreach(self, (x_i, ), (z_i, )): # def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = %(x)s_i;" # return "%(z)s_i = %(x)s_i;"
TensorCopy = s2t.make_broadcast(scal.Identity)
tensor_copy = gof.op.constructor(TensorCopy) tensor_copy = gof.op.constructor(TensorCopy)
########################## ##########################
...@@ -451,171 +526,198 @@ subtensor = gof.op.constructor(Subtensor) ...@@ -451,171 +526,198 @@ subtensor = gof.op.constructor(Subtensor)
# Arithmetic : Add # Arithmetic : Add
########################## ##########################
# Elemwise # # # Elemwise #
class AddElemwise(_Elemwise): # class AddElemwise(_Elemwise):
def impl(self, x, y): # def impl(self, x, y):
try: # try:
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
except Exception, e: # except Exception, e:
print '------ ERROR HERE' # print '------ ERROR HERE'
raise # raise
return x + y # return x + y
def grad(self, (x, y), gz): # def grad(self, (x, y), gz):
return gz, gz # return gz, gz
def c_foreach(self, (x_i, y_i), (z_i, )): # def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i + %(y)s_i;" # return "%(z)s_i = %(x)s_i + %(y)s_i;"
add_elemwise = gof.op.constructor(AddElemwise) # add_elemwise = gof.op.constructor(AddElemwise)
class AddElemwiseInplace(AddElemwise.inplace_version()): # class AddElemwiseInplace(AddElemwise.inplace_version()):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
x += y # x += y
return x # return x
add_elemwise_inplace = gof.op.constructor(AddElemwiseInplace) # add_elemwise_inplace = gof.op.constructor(AddElemwiseInplace)
# Scalar # # # Scalar #
class AddScalar(TensorScalarOp): # class AddScalar(TensorScalarOp):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
return x + a # return x + a
def grad(self, (x, a), gz): # def grad(self, (x, a), gz):
return gz, sum(gz) # return gz, sum(gz)
c_expr = "x_i + a" # c_expr = "x_i + a"
add_scalar = gof.op.constructor(AddScalar) # add_scalar = gof.op.constructor(AddScalar)
class AddScalarInplace(AddScalar.inplace_version()): # class AddScalarInplace(AddScalar.inplace_version()):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
x += a # x += a
return x # return x
add_scalar_inplace = gof.op.constructor(AddScalarInplace) # add_scalar_inplace = gof.op.constructor(AddScalarInplace)
add = _scalar_switch(add_elemwise, add_scalar, add_scalar) # add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace) # add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
Add = s2t.make_broadcast(scal.Add)
AddInplace = s2t.make_broadcast(scal.Add, {0:0})
add = gof.op.constructor(s2t.wrap_broadcast(Add))
add_inplace = gof.op.constructor(s2t.wrap_broadcast(AddInplace))
########################## ##########################
# Arithmetic : Sub # Arithmetic : Sub
########################## ##########################
# Elemwise # # # Elemwise #
class SubElemwise(_Elemwise): # class SubElemwise(_Elemwise):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
return x - y # return x - y
def grad(self, (x, y), gz): # def grad(self, (x, y), gz):
return gz, -gz # return gz, -gz
def c_foreach(self, (x_i, y_i), (z_i, )): # def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i - %(y)s_i;" # return "%(z)s_i = %(x)s_i - %(y)s_i;"
sub_elemwise = gof.op.constructor(SubElemwise) # sub_elemwise = gof.op.constructor(SubElemwise)
class SubElemwiseInplace(SubElemwise.inplace_version()): # class SubElemwiseInplace(SubElemwise.inplace_version()):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
x -= y # x -= y
return x # return x
sub_elemwise_inplace = gof.op.constructor(SubElemwiseInplace) # sub_elemwise_inplace = gof.op.constructor(SubElemwiseInplace)
# # Scalar #
# def sub_scalar_r(x, a):
# return add_scalar(x, -a)
# Scalar # # def sub_scalar_l(x, a):
def sub_scalar_r(x, a): # return add_scalar(-x, a)
return add_scalar(x, -a)
def sub_scalar_l(x, a): # def sub_scalar_rinplace(x, a):
return add_scalar(-x, a) # return add_scalar_inplace(x, -a)
def sub_scalar_rinplace(x, a): # sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
return add_scalar_inplace(x, -a) # sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
Sub = s2t.make_broadcast(scal.Sub)
SubInplace = s2t.make_broadcast(scal.Sub, {0:0})
sub = gof.op.constructor(s2t.wrap_broadcast(Sub))
sub_inplace = gof.op.constructor(s2t.wrap_broadcast(SubInplace))
sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
########################## ##########################
# Arithmetic : Mul # Arithmetic : Mul
########################## ##########################
# Elemwise # # # Elemwise #
class MulElemwise(_Elemwise): # class MulElemwise(_Elemwise):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
return x * y # return x * y
def grad(self, (x, y), gz): # def grad(self, (x, y), gz):
return mul(y, gz), mul(x, gz) # return mul(y, gz), mul(x, gz)
def c_foreach(self, (x_i, y_i), (z_i, )): # def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i * %(y)s_i;" # return "%(z)s_i = %(x)s_i * %(y)s_i;"
mul_elemwise = gof.op.constructor(MulElemwise) # mul_elemwise = gof.op.constructor(MulElemwise)
class MulElemwiseInplace(MulElemwise.inplace_version()): # class MulElemwiseInplace(MulElemwise.inplace_version()):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
x *= y # x *= y
return x # return x
mul_elemwise_inplace = gof.op.constructor(MulElemwiseInplace) # mul_elemwise_inplace = gof.op.constructor(MulElemwiseInplace)
# Scalar # # # Scalar #
class Scale(TensorScalarOp): # class Scale(TensorScalarOp):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
return x * a # return x * a
def grad(self, (x, a), gz): # def grad(self, (x, a), gz):
return scale(a, gz), sum(mul_elemwise(x, gz)) # return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "%(x)s_i * _%(a)s" # c_expr = "%(x)s_i * _%(a)s"
scale = gof.op.constructor(Scale) # scale = gof.op.constructor(Scale)
class ScaleInplace(Scale.inplace_version()): # class ScaleInplace(Scale.inplace_version()):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
x *= a # x *= a
return x # return x
scale_inplace = gof.op.constructor(ScaleInplace) # scale_inplace = gof.op.constructor(ScaleInplace)
mul = _scalar_switch(mul_elemwise, scale, scale) # mul = _scalar_switch(mul_elemwise, scale, scale)
mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace) # mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace)
Mul = s2t.make_broadcast(scal.Mul)
MulInplace = s2t.make_broadcast(scal.Mul, {0:0})
mul = gof.op.constructor(s2t.wrap_broadcast(Mul))
mul_inplace = gof.op.constructor(s2t.wrap_broadcast(MulInplace))
########################## ##########################
# Arithmetic : Div # Arithmetic : Div
########################## ##########################
# Elemwise # # # Elemwise #
class DivElemwise(_Elemwise): # class DivElemwise(_Elemwise):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
return x / y # return x / y
def grad(self, (x, y), gz): # def grad(self, (x, y), gz):
return div(gz, y), -div(mul(x, gz), (y*y)) # return div(gz, y), -div(mul(x, gz), (y*y))
def c_foreach(self, (x_i, y_i), (z_i, )): # def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = %(x)s_i / %(y)s_i;" # return "%(z)s_i = %(x)s_i / %(y)s_i;"
div_elemwise = gof.op.constructor(DivElemwise) # div_elemwise = gof.op.constructor(DivElemwise)
class DivElemwiseInplace(DivElemwise.inplace_version()): # class DivElemwiseInplace(DivElemwise.inplace_version()):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
x /= y # x /= y
return x # return x
div_elemwise_inplace = gof.op.constructor(DivElemwiseInplace) # div_elemwise_inplace = gof.op.constructor(DivElemwiseInplace)
class InvElemwise(_Elemwise): # class InvElemwise(_Elemwise):
def impl(self, x): # def impl(self, x):
return 1.0/x # return 1.0/x
def grad(self, x, gz): # def grad(self, x, gz):
ix = inv(x) # ix = inv(x)
return -gz * (ix * ix) # return -gz * (ix * ix)
def c_foreach(self, (x_i, ), (z_i, )): # def c_foreach(self, (x_i, ), (z_i, )):
return "%(z)s_i = 1.0 / %(x)s_i;" #TODO: cast 1.0 to the dtype of x # return "%(z)s_i = 1.0 / %(x)s_i;" #TODO: cast 1.0 to the dtype of x
inv_elemwise = gof.op.constructor(InvElemwise) # inv_elemwise = gof.op.constructor(InvElemwise)
# # Scalar #
# def div_scalar_r(x, a):
# return scale(x, inv_elemwise(a))
# Scalar # # def div_scalar_l(x, a):
def div_scalar_r(x, a): # return scale(inv_elemwise(x), a)
return scale(x, inv_elemwise(a))
def div_scalar_l(x, a): # def div_scalar_rinplace(x, a):
return scale(inv_elemwise(x), a) # return scale_inplace(x, inv_elemwise(a))
def div_scalar_rinplace(x, a): # div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
return scale_inplace(x, inv_elemwise(a)) # div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
Div = s2t.make_broadcast(scal.Div)
DivInplace = s2t.make_broadcast(scal.Div, {0:0})
div = gof.op.constructor(s2t.wrap_broadcast(Div))
div_inplace = gof.op.constructor(s2t.wrap_broadcast(DivInplace))
div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
...@@ -624,59 +726,66 @@ div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace) ...@@ -624,59 +726,66 @@ div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
# Arithmetic : Pow # Arithmetic : Pow
########################## ##########################
# Elemwise # # # Elemwise #
class PowElemwise(_Elemwise): # class PowElemwise(_Elemwise):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
return x ** y # return x ** y
def grad(self, (x, y), gz): # def grad(self, (x, y), gz):
gx = gz * y * (pow_elemwise(x, y-1.0)) # gx = gz * y * (pow_elemwise(x, y-1.0))
gy = gz * log(x) * pow_elemwise(x, y) # gy = gz * log(x) * pow_elemwise(x, y)
return gx, gy # return gx, gy
def c_foreach(self, (x_i, y_i), (z_i, )): # def c_foreach(self, (x_i, y_i), (z_i, )):
return "%(z)s_i = pow(%(x)s_i, %(y)s_i);" # return "%(z)s_i = pow(%(x)s_i, %(y)s_i);"
pow_elemwise = gof.op.constructor(PowElemwise) # pow_elemwise = gof.op.constructor(PowElemwise)
class PowElemwiseInplace(PowElemwise.inplace_version()): # class PowElemwiseInplace(PowElemwise.inplace_version()):
def impl(self, x, y): # def impl(self, x, y):
_assert_same_shapes(x, y) # _assert_same_shapes(x, y)
x **= y # x **= y
return x # return x
pow_elemwise_inplace = gof.op.constructor(PowElemwiseInplace) # pow_elemwise_inplace = gof.op.constructor(PowElemwiseInplace)
# Scalar # # # Scalar #
class PowScalarL(TensorScalarOp): # class PowScalarL(TensorScalarOp):
def impl(self, y, x): # def impl(self, y, x):
_assert_tensor_scalar(y, x) # _assert_tensor_scalar(y, x)
return x ** y # return x ** y
def grad(self, (y, x), gz): # def grad(self, (y, x), gz):
gx = sum(gz * y * x ** (y-1.0)) # gx = sum(gz * y * x ** (y-1.0))
gy = gz * log(x) * x ** y # gy = gz * log(x) * x ** y
return gy, gx # return gy, gx
c_expr = "pow(%(a)s, %(x)s_i)" # c_expr = "pow(%(a)s, %(x)s_i)"
pow_scalar_l = gof.op.constructor(PowScalarL) # pow_scalar_l = gof.op.constructor(PowScalarL)
class PowScalarR(TensorScalarOp): # class PowScalarR(TensorScalarOp):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
return x ** a # return x ** a
def grad(self, (x, s), gz): # def grad(self, (x, s), gz):
gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s) # gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x))) # gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
return gx, gs # return gx, gs
c_expr = "pow(%(x)s_i, _%(a)s)" # c_expr = "pow(%(x)s_i, _%(a)s)"
pow_scalar_r = gof.op.constructor(PowScalarR) # pow_scalar_r = gof.op.constructor(PowScalarR)
class PowScalarRInplace(PowScalarR.inplace_version()): # class PowScalarRInplace(PowScalarR.inplace_version()):
def impl(self, x, a): # def impl(self, x, a):
_assert_tensor_scalar(x, a) # _assert_tensor_scalar(x, a)
x **= a # x **= a
return x # return x
pow_scalar_r_inplace = gof.op.constructor(PowScalarRInplace) # pow_scalar_r_inplace = gof.op.constructor(PowScalarRInplace)
pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l) # pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace) # pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace)
Pow = s2t.make_broadcast(scal.Pow)
PowInplace = s2t.make_broadcast(scal.Pow, {0:0})
pow = gof.op.constructor(s2t.wrap_broadcast(Pow))
pow_inplace = gof.op.constructor(s2t.wrap_broadcast(PowInplace))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论