injected new elemwise in tensor.py

上级 761c7f9f
...@@ -22,19 +22,37 @@ def env(inputs, outputs, validate = True, features = []): ...@@ -22,19 +22,37 @@ def env(inputs, outputs, validate = True, features = []):
class _test_DimShuffle(unittest.TestCase): class _test_DimShuffle(unittest.TestCase):
def test_straightforward(self): def with_linker(self, linker):
x, y, z = inputs() for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
e0 = DimShuffle(x, [1, 'x', 0]).out ((1, 2, 3), (1, 2), (2, 3)),
f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True) ((1, 2, 1, 3), (1, 3), (2, 3)),
assert f(numpy.ones((2, 3))).shape == (3, 1, 2) ((2, 3, 4), (2, 1, 0), (4, 3, 2)),
((2, 3, 4), ('x', 2, 1, 0, 'x'), (1, 4, 3, 2, 1)),
((1, 4, 3, 2, 1), (3, 2, 1), (2, 3, 4)),
((1, 1, 4), (1, 2), (1, 4))]:
x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
e = DimShuffle(x, shuffle).out
# print shuffle, e.owner.grad(e.owner.inputs, e.owner.outputs).owner.new_order
f = linker(env([x], [e])).make_function(inplace=False)
assert f(numpy.ones(xsh)).shape == zsh
def test_perform(self):
self.with_linker(gof.PerformLinker)
# def test_straightforward(self):
# x, y, z = inputs()
# e0 = DimShuffle(x, [1, 'x', 0]).out
# f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
# assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
class _test_Broadcast(unittest.TestCase): class _test_Broadcast(unittest.TestCase):
def with_linker(self, linker): def with_linker(self, linker):
for xsh, ysh in [((5, 5), (5, 5)), for xsh, ysh in [((3, 5), (3, 5)),
((5, 5), (1, 5)), ((3, 5), (1, 5)),
((5, 5), (5, 1)), ((3, 5), (3, 1)),
((1, 5), (5, 1)), ((1, 5), (5, 1)),
((1, 1), (1, 1)), ((1, 1), (1, 1)),
((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (2, 3, 4, 5)),
...@@ -53,6 +71,10 @@ class _test_Broadcast(unittest.TestCase): ...@@ -53,6 +71,10 @@ class _test_Broadcast(unittest.TestCase):
yv = numpy.asarray(numpy.random.rand(*ysh)) yv = numpy.asarray(numpy.random.rand(*ysh))
zv = xv + yv zv = xv + yv
# print "AAAAAAAAAAAAAAAAAA"
# print f(xv, yv)
# print zv
# print "BBBBBBBBBBBBBBBBBB"
self.failUnless((f(xv, yv) == zv).all()) self.failUnless((f(xv, yv) == zv).all())
def with_linker_inplace(self, linker): def with_linker_inplace(self, linker):
...@@ -105,7 +127,9 @@ class _test_CAReduce(unittest.TestCase): ...@@ -105,7 +127,9 @@ class _test_CAReduce(unittest.TestCase):
for xsh, tosum in [((5, 6), (0, 1)), for xsh, tosum in [((5, 6), (0, 1)),
((5, 6), (0, )), ((5, 6), (0, )),
((5, 6), (1, )), ((5, 6), (1, )),
((2, 3, 4, 5), (0, 1, 3))]: ((5, 6), ()),
((2, 3, 4, 5), (0, 1, 3)),
((), ())]:
x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x')) x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
e = CAReduce(Add, [x], dimensions_to_reduce = tosum).out e = CAReduce(Add, [x], dimensions_to_reduce = tosum).out
f = linker(env([x], [e])).make_function(inplace = False) f = linker(env([x], [e])).make_function(inplace = False)
...@@ -113,7 +137,13 @@ class _test_CAReduce(unittest.TestCase): ...@@ -113,7 +137,13 @@ class _test_CAReduce(unittest.TestCase):
zv = xv zv = xv
for axis in reversed(sorted(tosum)): for axis in reversed(sorted(tosum)):
zv = numpy.add.reduce(zv, axis) zv = numpy.add.reduce(zv, axis)
self.failUnless((f(xv) - zv < 1e-10).all()) # print "AAAAAAAAAAAAAAAAAA"
# print xsh, tosum
# print f(xv)
# print zv
# print f(xv) - zv
# print "BBBBBBBBBBBBBBBBBB"
self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
def test_perform(self): def test_perform(self):
self.with_linker(gof.PerformLinker) self.with_linker(gof.PerformLinker)
...@@ -123,27 +153,27 @@ class _test_CAReduce(unittest.TestCase): ...@@ -123,27 +153,27 @@ class _test_CAReduce(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
# unittest.main() unittest.main()
x = modes.build(Tensor('float64', [0, 0], name = 'x')) # x = modes.build(Tensor('float64', [0, 0], name = 'x'))
y = modes.build(Tensor('float64', [0, 0], name = 'y')) # y = modes.build(Tensor('float64', [0, 0], name = 'y'))
e = Broadcast(SquareDiff, (x, y), {0:0}).out # e = Broadcast(SquareDiff, (x, y), {0:0}).out
f = gof.CLinker(env([x, y], [e])).make_function(inplace = False) # f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
xv = numpy.random.rand(1000, 1000) # xv = numpy.random.rand(1000, 1000)
yv = numpy.random.rand(1000, 1000) # yv = numpy.random.rand(1000, 1000)
zv = numpy.random.rand(1000, 1000) # zv = numpy.random.rand(1000, 1000)
add = numpy.frompyfunc(lambda x, y: x + y, 2, 1) # add = numpy.frompyfunc(lambda x, y: x + y, 2, 1)
t0 = time.time() # t0 = time.time()
for i in xrange(100): # for i in xrange(100):
xv -= yv # xv -= yv
xv *= xv # xv *= xv
# xv += yv # # xv += yv
print time.time() - t0 # print time.time() - t0
t0 = time.time() # t0 = time.time()
for i in xrange(100): # for i in xrange(100):
f(xv, yv) # f(xv, yv)
print time.time() - t0 # print time.time() - t0
......
...@@ -7,7 +7,9 @@ from compile import Function, eval_outputs ...@@ -7,7 +7,9 @@ from compile import Function, eval_outputs
import gradient import gradient
import gof, gof.graph import gof, gof.graph
from gof.python25 import any from gof.python25 import any
import gof
from elemwise2 import DimShuffle
def _numpy_checker(x, y): def _numpy_checker(x, y):
""" """
...@@ -58,6 +60,15 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001 ...@@ -58,6 +60,15 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001
if not isinstance(analytic_grad, (list, tuple)): if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad] analytic_grad = [analytic_grad]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if num_grad.max_err(analytic_grad) > 1.0e-4: if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad) raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
...@@ -361,6 +372,15 @@ class T_add(unittest.TestCase): ...@@ -361,6 +372,15 @@ class T_add(unittest.TestCase):
f = Function([a,b], [fn(a, b)], linker_cls = gof.CLinker) f = Function([a,b], [fn(a, b)], linker_cls = gof.CLinker)
self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data))) self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data)))
def test_grad_scalar_l(self):
verify_grad(self, Add, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_grad_scalar_r(self):
verify_grad(self, Add, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_grad_row(self):
verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_grad_col(self):
verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
class T_abs(unittest.TestCase): class T_abs(unittest.TestCase):
def test_impl(self): def test_impl(self):
...@@ -381,8 +401,8 @@ class T_abs(unittest.TestCase): ...@@ -381,8 +401,8 @@ class T_abs(unittest.TestCase):
class AbsBadGrad(tensor._Elemwise): class AbsBadGrad(tensor._Elemwise):
def impl(self, x): def impl(self, x):
return numpy.abs(x) return numpy.abs(x)
def grad(self, x, gz): def grad(self, (x, ), (gz, )):
return scale(gz * sgn(x),0.9) return mul(gz * sgn(x),0.9),
def c_foreach(self, (x_i, ), (z_i, )): def c_foreach(self, (x_i, ), (z_i, )):
return "z_i = abs(x_i);" return "z_i = abs(x_i);"
...@@ -401,7 +421,7 @@ class T_fill(unittest.TestCase): ...@@ -401,7 +421,7 @@ class T_fill(unittest.TestCase):
o = t.owner o = t.owner
self.failUnless(o.inputs[0].broadcastable == (0,)) self.failUnless(o.inputs[0].broadcastable == (0,))
# self.failUnless(o.inputs[0].dtype[0:3] == 'int') # self.failUnless(o.inputs[0].dtype[0:3] == 'int')
self.failUnless(o.inputs[1].broadcastable == ()) self.failUnless(o.inputs[1].broadcastable == (1,))
# self.failUnless(o.inputs[1].dtype[0:3] == 'flo') # self.failUnless(o.inputs[1].dtype[0:3] == 'flo')
self.failUnless(o.outputs[0].broadcastable == (0,)) self.failUnless(o.outputs[0].broadcastable == (0,))
# self.failUnless(o.outputs[0].dtype[0:3] == 'flo') # self.failUnless(o.outputs[0].dtype[0:3] == 'flo')
...@@ -432,47 +452,70 @@ class T_mul(unittest.TestCase): ...@@ -432,47 +452,70 @@ class T_mul(unittest.TestCase):
def test_elemwise(self): def test_elemwise(self):
a = astensor(0.0) a = astensor(0.0)
b = astensor(0.0) b = astensor(0.0)
check_eq2_both(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0) check_eq2_both(self, [a,b], mul(a,b), [3.0, 4.0], 12.0)
check_eq2_both(self, [a,b], mul_elemwise(b,a), [-1.0,2.0], -2.0) check_eq2_both(self, [a,b], mul(b,a), [-1.0,2.0], -2.0)
self.failUnless(isinstance(mul(a,b).owner, Scale)) #self.failUnless(isinstance(mul(a,b).owner, Scale))
a = astensor(numpy.ones(2)) a = astensor(numpy.ones(2))
b = astensor(numpy.ones(2)) b = astensor(numpy.ones(2))
aa = numpy.asarray([-0.5, 4.0]) aa = numpy.asarray([-0.5, 4.0])
bb = numpy.asarray([-0.5, 2.0]) bb = numpy.asarray([-0.5, 2.0])
check_eq2_both(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0])) check_eq2_both(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
check_eq2_both(self, [a,b], mul_elemwise(a,b), [bb,aa], numpy.asarray([0.25, 8.0])) check_eq2_both(self, [a,b], mul(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
self.failUnless(isinstance(mul(a,b).owner, MulElemwise)) #self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
def test_scalar(self): def test_scalar(self):
r = numpy.random.rand(2,3) r = numpy.random.rand(2,3)
a = astensor(r) a = astensor(r)
b = astensor(2.0) b = astensor(2.0)
check_eq2_both(self, [a,b], scale(a,b), [r, 2.0], r*2.0) check_eq2_both(self, [a,b], mul(a,b), [r, 2.0], r*2.0)
check_eq2_both(self, [a,b], scale(a,b), [r, 4.0], r*4.0) check_eq2_both(self, [a,b], mul(a,b), [r, 4.0], r*4.0)
self.failUnless(b.data == 2.0) self.failUnless(b.data == 2.0)
def test_operator(self): def test_rowcol(self):
a = astensor([1,1]) r1 = numpy.random.rand(3,5)
aa = astensor([1,1]) r2 = numpy.random.rand(1,5)
b = astensor(4) r3 = numpy.random.rand(3,1)
self.failUnless(isinstance((a*b).owner, Scale)) a1, a2, a3 = astensor(r1), astensor(r2), astensor(r3)
self.failUnless(isinstance((b*a).owner, Scale)) check_eq2_both(self, [a1,a2], mul(a1,a2), [r1, r2], r1*r2)
self.failUnless(isinstance((a*aa).owner, MulElemwise)) check_eq2_both(self, [a1,a3], mul(a1,a3), [r1, r3], r1*r3)
self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_grad_elemwise(self):
verify_grad(self, Mul, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_grad_scalar_l(self):
verify_grad(self, Mul, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_grad_scalar_r(self):
verify_grad(self, Mul, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_grad_row(self):
verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_grad_row2(self):
op = lambda x, y: Mul(x, DimShuffle(y, ['x', 0]).out)
verify_grad(self, op, [numpy.random.rand(3, 5), numpy.random.rand(5)])
def test_grad_col(self):
verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
# def test_operator(self):
# a = astensor([1,1])
# aa = astensor([1,1])
# b = astensor(4)
# self.failUnless(isinstance((a*b).owner, Scale))
# self.failUnless(isinstance((b*a).owner, Scale))
# self.failUnless(isinstance((a*aa).owner, MulElemwise))
# self.failUnless(isinstance((aa*a).owner, MulElemwise))
def test_wrong_shapes(self): def test_wrong_shapes(self):
a = astensor(numpy.ones(3)) a = astensor(numpy.ones(3))
b = astensor(numpy.ones(4)) b = astensor(numpy.ones(4))
try: try:
check_eq2(self, [a,b], MulElemwise(a,b).out, check_eq2(self, [a,b], Mul(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0) [numpy.ones(3), numpy.ones(4)], 1.0)
self.fail() self.fail()
except ValueError, e: except ValueError, e:
self.failUnless(e[0] is tensor._assert_same_shapes.E_shape) self.failUnless('shape mismatch' in str(e))
try: try:
check_eq2_c(self, [a,b], MulElemwise(a,b).out, check_eq2_c(self, [a,b], Mul(a,b).out,
[numpy.ones(3), numpy.ones(4)], 1.0) [numpy.ones(3), numpy.ones(4)], 1.0)
self.fail() self.fail()
except ValueError, e: except ValueError, e:
...@@ -482,14 +525,14 @@ class T_div(unittest.TestCase): ...@@ -482,14 +525,14 @@ class T_div(unittest.TestCase):
def setUp(self): def setUp(self):
numpy.random.seed(9999) numpy.random.seed(9999)
def test_grad_e(self): def test_grad_e(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())]) verify_grad(self, Div, [numpy.random.rand(3), numpy.ones(3)])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)]) verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1]) verify_grad(self, Div, [numpy.ones(()), numpy.ones(())])
def test_grad_sl(self): def test_grad_sl(self):
verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())]) verify_grad(self, Div, [numpy.ones((3, 5)), numpy.ones((1, 1))])
verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)]) verify_grad(self, Div, [numpy.random.rand(3), numpy.ones((1, ))])
verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1]) verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(1,1)])
class T_log2(unittest.TestCase): class T_log2(unittest.TestCase):
def test0(self): def test0(self):
...@@ -509,12 +552,16 @@ class T_pow(unittest.TestCase): ...@@ -509,12 +552,16 @@ class T_pow(unittest.TestCase):
def setUp(self): def setUp(self):
numpy.random.seed(9999) numpy.random.seed(9999)
def test_elemwise(self): def test_elemwise(self):
verify_grad(self, DivElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1]) verify_grad(self, Div, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
verify_grad(self, PowElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)]) verify_grad(self, Pow, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
def test_scalar_l(self): def test_scalar_l(self):
verify_grad(self, PowScalarL, [numpy.random.rand(3), numpy.asarray(3.0)]) verify_grad(self, Pow, [numpy.asarray([3.0]), numpy.random.rand(3)])
def test_scalar_r(self): def test_scalar_r(self):
verify_grad(self, PowScalarR, [numpy.random.rand(3), numpy.asarray(3.0)]) verify_grad(self, Pow, [numpy.random.rand(3), numpy.asarray([3.0])])
def test_row(self):
verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
def test_col(self):
verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
class _testCase_matinv(unittest.TestCase): class _testCase_matinv(unittest.TestCase):
......
...@@ -94,7 +94,7 @@ class BaseTensor(ResultBase): ...@@ -94,7 +94,7 @@ class BaseTensor(ResultBase):
'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'), 'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype] 'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype]
except KeyError: except KeyError:
raise TypeError("Unsupported dtype for BaseTensor: %s" % self.dtype) raise TypeError("Unsupported dtype for %s: %s" % (self.__class__.__name__, self.dtype))
# #
# Hash for constant folding # Hash for constant folding
......
...@@ -3,12 +3,16 @@ import elemwise_cgen as cgen ...@@ -3,12 +3,16 @@ import elemwise_cgen as cgen
import numpy import numpy
from gof import Op, Viewer, Destroyer from gof import Op, Viewer, Destroyer
from tensor import Tensor from base_tensor import BaseTensor as Tensor
from scalar import upcast, Scalar from scalar import upcast, Scalar
import scalar_ops import scalar_ops
import gof import gof
def astensor(data):
assert isinstance(data, Tensor)
return data
################## ##################
### DimShuffle ### ### DimShuffle ###
...@@ -18,6 +22,8 @@ class DimShuffle(Op, Viewer): ...@@ -18,6 +22,8 @@ class DimShuffle(Op, Viewer):
def __init__(self, input, new_order, inplace = True): def __init__(self, input, new_order, inplace = True):
input = astensor(input)
ib = input.broadcastable ib = input.broadcastable
ob = [] ob = []
for value in new_order: for value in new_order:
...@@ -36,12 +42,22 @@ class DimShuffle(Op, Viewer): ...@@ -36,12 +42,22 @@ class DimShuffle(Op, Viewer):
self.inplace = inplace self.inplace = inplace
self.numorder = [x for x in new_order if type(x) == int] self.drop = []
self.is_transposition = sorted(new_order) == range(len(ib)) self.augment = []
self.dup_dims = len(set(self.numorder)) != len(self.numorder) i2j = {}
self.all_dims = len(set(self.numorder)) == len(ib) j = 0
if self.dup_dims or not self.all_dims: for i, b in enumerate(ib):
raise NotImplementedError("You must provide a permutation of *all* the input dimensions with *no duplicates*.") if i not in new_order:
if b == 1:
self.drop.append(i)
else:
raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
else:
i2j[i] = j
j += 1
self.shuffle = [i2j[x] for x in new_order if x != 'x']
self.augment = [i for i, x in enumerate(new_order) if x == 'x']
def clone_with_new_inputs(self, *new_inputs): def clone_with_new_inputs(self, *new_inputs):
return DimShuffle(new_inputs[0], self.new_order, self.inplace) return DimShuffle(new_inputs[0], self.new_order, self.inplace)
...@@ -53,19 +69,31 @@ class DimShuffle(Op, Viewer): ...@@ -53,19 +69,31 @@ class DimShuffle(Op, Viewer):
return {} return {}
def perform(self): def perform(self):
res = self.inputs[0].data.transpose(self.numorder) res = self.inputs[0].data
shape = list(res.shape) shape = list(res.shape)
new_shape = [] for drop in reversed(self.drop):
for entry in self.new_order: shape.pop(drop)
if entry == 'x': res = res.reshape(shape)
new_shape.append(1)
else: res = res.transpose(self.shuffle)
new_shape.append(shape.pop(0))
res = res.reshape(new_shape) shape = list(res.shape)
for augm in self.augment:
shape.insert(augm, 1)
res = res.reshape(shape)
if not self.inplace: if not self.inplace:
res = numpy.copy(res) res = numpy.copy(res)
self.outputs[0].data = res self.outputs[0].data = res
def grad(self, (x, ), (gz, )):
grad_order = ['x'] * len(self.inputs[0].broadcastable)
for i, x in enumerate(self.new_order):
if x != 'x':
grad_order[x] = i
return DimShuffle(gz, grad_order).out,
def __str__(self): def __str__(self):
return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order) return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order)
...@@ -90,6 +118,9 @@ class Transpose(DimShuffle): ...@@ -90,6 +118,9 @@ class Transpose(DimShuffle):
class Broadcast(Op, Destroyer): class Broadcast(Op, Destroyer):
def __init__(self, scalar_opclass, inputs, inplace_pattern = {}): def __init__(self, scalar_opclass, inputs, inplace_pattern = {}):
inputs = map(astensor, inputs)
try: try:
assert len(set([len(input.broadcastable) for input in inputs])) == 1 assert len(set([len(input.broadcastable) for input in inputs])) == 1
except (AssertionError, AttributeError): except (AssertionError, AttributeError):
...@@ -141,15 +172,29 @@ class Broadcast(Op, Destroyer): ...@@ -141,15 +172,29 @@ class Broadcast(Op, Destroyer):
if r in scalar_ograds: if r in scalar_ograds:
return ograds[scalar_ograds.index(r)] return ograds[scalar_ograds.index(r)]
op = r.owner op = r.owner
if op is None:
b = [1] * len(inputs[0].broadcastable)
res = astensor(numpy.asarray(r.data).reshape(b),
broadcastable = b)
return res
op_class = op.__class__ op_class = op.__class__
bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {}) bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {}).out
return bcasted return bcasted
ret = [] ret = []
for scalar_igrad, input in zip(scalar_igrads, inputs): for scalar_igrad, input in zip(scalar_igrads, inputs):
r = transform(scalar_igrad) r = transform(scalar_igrad)
to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast] to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast]
if to_sum: if to_sum:
shuffle = []
j = 0
for bcast in input.broadcastable:
if bcast == 1:
shuffle.append('x')
else:
shuffle.append(j)
j += 1
sr = Sum(r, axis = to_sum).out sr = Sum(r, axis = to_sum).out
sr = DimShuffle(sr, shuffle).out
ret.append(sr) ret.append(sr)
else: else:
ret.append(r) ret.append(r)
...@@ -269,16 +314,19 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None): ...@@ -269,16 +314,19 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
New.__name__ = "Tensor" + scalar_opclass.__name__ New.__name__ = "Tensor" + scalar_opclass.__name__
return New return New
def broadcast(op): def wrap_broadcast(op):
def instantiate(*inputs): def instantiate(*inputs):
inputs = map(astensor, inputs)
target_length = max([len(input.broadcastable) for input in inputs]) target_length = max([len(input.broadcastable) for input in inputs])
args = [] args = []
for input in inputs: for input in inputs:
difference = target_length - len(input.broadcastable) length = len(input.broadcastable)
difference = target_length - length
if not difference: if not difference:
args.append(input) args.append(input)
else: else:
args.append(DimShuffle(input, ['x']*difference + range(length))) args.append(DimShuffle(input, ['x']*difference + range(length)).out)
return op(*args) return op(*args)
return instantiate return instantiate
...@@ -319,6 +367,8 @@ class CAReduce(Op): ...@@ -319,6 +367,8 @@ class CAReduce(Op):
""" """
def __init__(self, scalar_opclass, inputs, dimensions_to_reduce = None): def __init__(self, scalar_opclass, inputs, dimensions_to_reduce = None):
inputs = map(astensor, inputs)
if scalar_opclass.nin != 2 or scalar_opclass.nout != 1: if scalar_opclass.nin != 2 or scalar_opclass.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.") raise NotImplementedError("CAReduce only supports binary functions with a single output.")
if len(inputs) != 1: if len(inputs) != 1:
...@@ -346,9 +396,13 @@ class CAReduce(Op): ...@@ -346,9 +396,13 @@ class CAReduce(Op):
def perform(self): def perform(self):
result = self.inputs[0].data result = self.inputs[0].data
for dimension in reversed(sorted(self.dimensions_to_reduce)): to_reduce = reversed(sorted(self.dimensions_to_reduce))
if to_reduce:
for dimension in to_reduce:
result = self.ufunc.reduce(result, dimension) result = self.ufunc.reduce(result, dimension)
self.outputs[0].data = result self.outputs[0].data = result
else:
self.outputs[0].data = numpy.copy(result)
def _c_all(self, inames, onames, sub): def _c_all(self, inames, onames, sub):
...@@ -363,6 +417,9 @@ class CAReduce(Op): ...@@ -363,6 +417,9 @@ class CAReduce(Op):
tosum = self.dimensions_to_reduce tosum = self.dimensions_to_reduce
if tosum == ():
return Broadcast(scalar_ops.Identity, (input, ))._c_all(inames, onames, sub)
order1 = [i for i in xrange(len(input.broadcastable)) if i not in tosum] order1 = [i for i in xrange(len(input.broadcastable)) if i not in tosum]
order = order1 + list(tosum) order = order1 + list(tosum)
...@@ -459,7 +516,19 @@ def make_reduce(scalar_opclass, name = None): ...@@ -459,7 +516,19 @@ def make_reduce(scalar_opclass, name = None):
New.__name__ = "Reduce" + scalar_opclass.__name__ New.__name__ = "Reduce" + scalar_opclass.__name__
return New return New
Sum = make_reduce(scalar_ops.Add, name = 'Sum') class Sum(make_reduce(scalar_ops.Add)):
def grad(self, (x, ), (gz, )):
if self.dimensions_to_reduce == ():
return gz,
new_dims = []
i = 0
for j, _ in enumerate(x.broadcastable):
if j in self.dimensions_to_reduce:
new_dims.append('x')
else:
new_dims.append(i)
i += 1
return Broadcast(scalar_ops.Second, (x, DimShuffle(gz, new_dims).out)).out,
def reduce(op): def reduce(op):
......
...@@ -832,8 +832,14 @@ class DualLinker(Linker): ...@@ -832,8 +832,14 @@ class DualLinker(Linker):
op_order_1 = env1.toposort() op_order_1 = env1.toposort()
op_order_2 = [equiv[op.outputs[0]].owner for op in op_order_1] # we need to have the exact same order so we can compare each step op_order_2 = [equiv[op.outputs[0]].owner for op in op_order_1] # we need to have the exact same order so we can compare each step
def c_make_thunk(op):
try:
return CLinker(op).make_thunk(True)[0]
except AbstractFunctionError:
return op.perform
thunks1 = [op.perform for op in op_order_1] thunks1 = [op.perform for op in op_order_1]
thunks2 = [CLinker(op).make_thunk(True)[0] for op in op_order_2] thunks2 = [c_make_thunk(op) for op in op_order_2]
def f(): def f():
for input1, input2 in zip(env1.inputs, env2.inputs): for input1, input2 in zip(env1.inputs, env2.inputs):
......
...@@ -76,14 +76,17 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -76,14 +76,17 @@ def grad_sources_inputs(sources, graph_inputs):
#if all output gradients are None, continue #if all output gradients are None, continue
if all(map(lambda x:x is None, g_outputs)): continue if all(map(lambda x:x is None, g_outputs)): continue
output_arg = _unpack_result(g_outputs) # output_arg = _unpack_result(g_outputs)
input_arg = _unpack_result(op.inputs) # input_arg = _unpack_result(op.inputs)
output_arg = g_outputs
input_arg = op.inputs
op_grad = op.grad(input_arg, output_arg) op_grad = op.grad(input_arg, output_arg)
if op_grad is None: if op_grad is None:
raise ValueError(_msg_retNone, op.__class__) raise ValueError(_msg_retNone, op.__class__)
if isinstance(op_grad, float): if isinstance(op_grad, float):
raise TypeError('wtf!!!!!!!!', op) raise TypeError('wtf!!!!!!!!', op)
g_inputs = _pack_result(op_grad) g_inputs = op_grad #_pack_result(op_grad)
assert isinstance(g_inputs, (list, tuple)) assert isinstance(g_inputs, (list, tuple))
if len(g_inputs) != len(op.inputs): if len(g_inputs) != len(op.inputs):
raise ValueError(_msg_badlen, raise ValueError(_msg_badlen,
...@@ -123,6 +126,10 @@ class numeric_grad: ...@@ -123,6 +126,10 @@ class numeric_grad:
""" """
gf = [numpy.ndarray(x.shape) for x in pt] gf = [numpy.ndarray(x.shape) for x in pt]
f_pt = f(*pt) f_pt = f(*pt)
if isinstance(f, (list, tuple)):
f_pt = [numpy.copy(x) for x in f_pt]
else:
f_pt = numpy.copy(f_pt)
for idx in xrange(len(gf)): for idx in xrange(len(gf)):
if len(pt[idx].shape) == 0: if len(pt[idx].shape) == 0:
......
...@@ -12,6 +12,10 @@ def as_scalar(x, name = None): ...@@ -12,6 +12,10 @@ def as_scalar(x, name = None):
s = Scalar('float64', name = name) s = Scalar('float64', name = name)
s.data = x s.data = x
return s return s
if isinstance(x, int):
s = Scalar('int32', name = name)
s.data = x
return s
if isinstance(x, Scalar): if isinstance(x, Scalar):
return x return x
...@@ -45,7 +49,8 @@ class Scalar(ResultBase): ...@@ -45,7 +49,8 @@ class Scalar(ResultBase):
# and self.data == other.data # and self.data == other.data
def dtype_specs(self): def dtype_specs(self):
return {'float64': (float, 'double', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble')}[self.dtype] return {'float64': (float, 'npy_float64', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble'),
'int32': (int, 'npy_int32', 'PyInt_Check', 'PyInt_AsLong', 'PyInt_FromLong')}[self.dtype]
def c_declare(self, name, sub): def c_declare(self, name, sub):
return """ return """
......
...@@ -18,7 +18,7 @@ class Sub(BinaryScalarOp): ...@@ -18,7 +18,7 @@ class Sub(BinaryScalarOp):
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s - %(y)s;" % locals() return "%(z)s = %(x)s - %(y)s;" % locals()
def grad(self, (x, y), (gz, )): def grad(self, (x, y), (gz, )):
return gz, -gz return gz, neg(gz)
class Mul(BinaryScalarOp): class Mul(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
...@@ -34,62 +34,119 @@ class Div(BinaryScalarOp): ...@@ -34,62 +34,119 @@ class Div(BinaryScalarOp):
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s / %(y)s;" % locals() return "%(z)s = %(x)s / %(y)s;" % locals()
def grad(self, (x, y), (gz, )): def grad(self, (x, y), (gz, )):
return div(gz, y), -div(mul(x, gz), y*y) return div(gz, y), neg(div(mul(x, gz), mul(y, y)))
class Pow(BinaryScalarOp): class Pow(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return x ** y return x ** y
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = pow(%(x)s, %(y)s);" % locals() return "%(z)s = pow(%(x)s, %(y)s);" % locals()
def grad(self, (x, y), (gz, )):
return mul(gz, mul(y, pow(x, sub(y, as_scalar(1))))), mul(gz, mul(log(x), pow(x, y)))
class First(BinaryScalarOp): class First(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return x return x
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s;" % locals() return "%(z)s = %(x)s;" % locals()
def grad(self, (x, y), (gz, )):
return gz, None
class Second(BinaryScalarOp): class Second(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
return y return y
def c_code(self, (x, y), (z, ), sub): def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(y)s;" % locals() return "%(z)s = %(y)s;" % locals()
def grad(self, (x, y), (gz, )):
return None, gz
class SquareDiff(BinaryScalarOp): # class SquareDiff(BinaryScalarOp):
def impl(self, x, y): # def impl(self, x, y):
diff = (x - y) # diff = (x - y)
return diff * diff # return diff * diff
def c_code(self, (x, y), (z, ), sub): # def c_code(self, (x, y), (z, ), sub):
return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals() # return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals()
class Identity(UnaryScalarOp):
def impl(self, x):
return x
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s;" % locals()
def grad(self, (x, y), (gz, )):
return gz,
class Neg(UnaryScalarOp): class Neg(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return -x return -x
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return -gz return neg(gz),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = -%(x)s;" % locals() return "%(z)s = -%(x)s;" % locals()
class Abs(UnaryScalarOp):
def impl(self, x):
return numpy.abs(x)
def grad(self, (x, ), (gz, )):
return mul(gz, sgn(x)),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = abs(%(x)s);" % locals()
class Sgn(UnaryScalarOp):
def impl(self, x):
return numpy.abs(x) / x
def grad(self, (x, ), (gz, )):
return None,
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s/abs(%(x)s);" % locals() # TODO: C use copysign
class Inv(UnaryScalarOp): class Inv(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return 1 / x return 1 / x
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return -gz / (x*x) return div(neg(gz), mul(x, x)),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = 1 / %(x)s;" % locals() return "%(z)s = 1 / %(x)s;" % locals()
class Log(UnaryScalarOp): class Log(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return math.log(x) return math.log(x)
def grad(self, (x, ), (gz, )):
return div(gz, x),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = log(%(x)s);" % locals() return "%(z)s = log(%(x)s);" % locals()
class Log2(UnaryScalarOp):
def impl(self, x):
return numpy.log2(x)
def grad(self, (x, ), (gz, )):
return div(gz, mul(x, as_scalar(math.log(2.0)))),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = log2(%(x)s);" % locals()
class Exp(UnaryScalarOp): class Exp(UnaryScalarOp):
def impl(self, x): def impl(self, x):
return math.exp(x) return math.exp(x)
def grad(self, (x, ), (gz, )):
return mul(gz, exp(x)),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = exp(%(x)s);" % locals() return "%(z)s = exp(%(x)s);" % locals()
class Sqr(UnaryScalarOp):
def impl(self, x):
return x*x
def grad(self, (x, ), (gz, )):
return mul(gz, mul(x, as_scalar(2))),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = %(x)s * %(x)s;" % locals()
class Sqrt(UnaryScalarOp):
def impl(self, x):
return math.sqrt(x)
def grad(self, (x, ), (gz, )):
return div(mul(gz, as_scalar(0.5)), sqrt(x)),
def c_code(self, (x, ), (z, ), sub):
return "%(z)s = sqrt(%(x)s);" % locals()
# class Sigmoid(UnaryComposite): # class Sigmoid(UnaryComposite):
# def expand_impl(self, x): # def expand_impl(self, x):
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论