提交 1ddd6c38 authored 作者: Olivier Breuleux's avatar Olivier Breuleux

doc, small fixes to tests, defined more gradients

上级 2d60d3e3
...@@ -7,6 +7,7 @@ import gof ...@@ -7,6 +7,7 @@ import gof
from scalar import * from scalar import *
import tensor
from elemwise import * from elemwise import *
...@@ -40,13 +41,6 @@ class _test_DimShuffle(unittest.TestCase): ...@@ -40,13 +41,6 @@ class _test_DimShuffle(unittest.TestCase):
self.with_linker(gof.PerformLinker) self.with_linker(gof.PerformLinker)
# def test_straightforward(self):
# x, y, z = inputs()
# e0 = DimShuffle(x, [1, 'x', 0]).out
# f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
# assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
class _test_Broadcast(unittest.TestCase): class _test_Broadcast(unittest.TestCase):
def with_linker(self, linker): def with_linker(self, linker):
...@@ -164,27 +158,42 @@ class _test_CAReduce(unittest.TestCase): ...@@ -164,27 +158,42 @@ class _test_CAReduce(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# x = modes.build(Tensor('int32', [0, 0], name = 'x'))
# y = modes.build(Tensor('int32', [0, 0], name = 'y')) # # x = modes.build(Tensor('int32', [0, 0], name = 'x'))
# # x = modes.build(Tensor('float64', [0, 0], name = 'x')) # # y = modes.build(Tensor('int32', [0, 0], name = 'y'))
# # y = modes.build(Tensor('float64', [0, 0], name = 'y')) # from scalar import Scalar, composite
# e = Broadcast(Pow, (x, y)).out # x = modes.build(Tensor('float64', [0, 0], name = 'x'))
# y = modes.build(Tensor('float64', [0, 0], name = 'y'))
# xs, ys = Scalar('float64'), Scalar('float64')
# e = Broadcast(composite([xs, ys], [(xs * ys) + (xs / ys) * 7.0]), (x, y)).out
# f = gof.CLinker(env([x, y], [e])).make_function(inplace = False) # f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
# # xv = numpy.random.rand(1000, 1000) # size = 2000
# # yv = numpy.random.rand(1000, 1000) # xv = numpy.random.rand(size, size)
# # zv = numpy.random.rand(1000, 1000) # yv = numpy.random.rand(size, size)
# xv = numpy.random.randint(1, 5, (1000, 1000)) # zv = numpy.random.rand(size, size)
# yv = numpy.random.randint(1, 5, (1000, 1000)) # # xv = numpy.random.randint(1, 5, (1000, 1000))
# add = numpy.frompyfunc(lambda x, y: x + y, 2, 1) # # yv = numpy.random.randint(1, 5, (1000, 1000))
# # t0 = time.time() # # t0 = time.time()
# # for i in xrange(100): # # for i in xrange(100):
# # xv / yv # # xv / yv
# # print time.time() - t0 # # print time.time() - t0
# # t0 = time.time()
# # for i in xrange(10):
# # f(xv, yv)
# # print time.time() - t0
# # t0 = time.time()
# # for i in xrange(10):
# # (xv * yv) + (xv / yv) * 7.0
# # print time.time() - t0
# from scipy import weave
# import numpy
# t0 = time.time() # t0 = time.time()
# for i in xrange(100): # for i in xrange(10):
# f(xv, yv) # weave.blitz("zv = dot(xv, yv)", locals())
# print time.time() - t0 # print time.time() - t0
# speed ratios: # speed ratios:
......
...@@ -281,7 +281,7 @@ PowTester = make_broadcast_tester(op_class = Pow, ...@@ -281,7 +281,7 @@ PowTester = make_broadcast_tester(op_class = Pow,
row = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 3))), row = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 3))),
column = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 1)))) column = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 1))))
) )
PowTester = make_broadcast_tester(op_class = PowInplace, PowInplaceTester = make_broadcast_tester(op_class = PowInplace,
expected = lambda x, y: x ** y, expected = lambda x, y: x ** y,
good = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 3))), good = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 3))),
scalar = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 1))), scalar = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 1))),
...@@ -417,28 +417,36 @@ CosInplaceTester = make_broadcast_tester(op_class = CosInplace, ...@@ -417,28 +417,36 @@ CosInplaceTester = make_broadcast_tester(op_class = CosInplace,
TanTester = make_broadcast_tester(op_class = Tan, TanTester = make_broadcast_tester(op_class = Tan,
expected = numpy.tan, expected = numpy.tan,
good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),), good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
grad = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
shifted = (rand_ranged(3.15, 6.28, (2, 3)),))) shifted = (rand_ranged(3.15, 6.28, (2, 3)),)))
TanInplaceTester = make_broadcast_tester(op_class = CosInplace, TanInplaceTester = make_broadcast_tester(op_class = TanInplace,
expected = numpy.cos, expected = numpy.tan,
good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),), good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
shifted = (rand_ranged(3.15, 6.28, (2, 3)),)), shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
grad = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
inplace = True) inplace = True)
CoshTester = make_broadcast_tester(op_class = Cosh, CoshTester = make_broadcast_tester(op_class = Cosh,
expected = numpy.cosh, expected = numpy.cosh,
good = _good_broadcast_unary_normal) good = _good_broadcast_unary_normal,
grad = _grad_broadcast_unary_normal)
CoshInplaceTester = make_broadcast_tester(op_class = CoshInplace, CoshInplaceTester = make_broadcast_tester(op_class = CoshInplace,
expected = numpy.cosh, expected = numpy.cosh,
good = _good_broadcast_unary_normal, good = _good_broadcast_unary_normal,
grad = _grad_broadcast_unary_normal,
inplace = True) inplace = True)
SinhTester = make_broadcast_tester(op_class = Sinh, SinhTester = make_broadcast_tester(op_class = Sinh,
expected = numpy.sinh, expected = numpy.sinh,
good = _good_broadcast_unary_normal) good = _good_broadcast_unary_normal,
grad = _grad_broadcast_unary_normal)
SinhInplaceTester = make_broadcast_tester(op_class = SinhInplace, SinhInplaceTester = make_broadcast_tester(op_class = SinhInplace,
expected = numpy.sinh, expected = numpy.sinh,
good = _good_broadcast_unary_normal, good = _good_broadcast_unary_normal,
grad = _grad_broadcast_unary_normal,
inplace = True) inplace = True)
TanhTester = make_broadcast_tester(op_class = Tanh, TanhTester = make_broadcast_tester(op_class = Tanh,
......
...@@ -3,15 +3,18 @@ import elemwise_cgen as cgen ...@@ -3,15 +3,18 @@ import elemwise_cgen as cgen
import numpy import numpy
from gof import Op, Viewer, Destroyer from gof import Op, Viewer, Destroyer
from base_tensor import BaseTensor as Tensor #from base_tensor import BaseTensor as Tensor
import scalar import scalar
from scalar import upcast, Scalar from scalar import upcast, Scalar
import gof import gof
from gof.python25 import all from gof.python25 import all
def astensor(data): def astensor(data):
assert isinstance(data, Tensor) raise Exception("Circular dependencies prevent using this here. import tensor before elemwise")
return data
def Tensor(*inputs, **kwargs):
raise Exception("Circular dependencies prevent using this here. import tensor before elemwise")
################## ##################
...@@ -20,8 +23,35 @@ def astensor(data): ...@@ -20,8 +23,35 @@ def astensor(data):
class DimShuffle(Op, Viewer): class DimShuffle(Op, Viewer):
""" """
@todo: DOCUMENTATION? --jpt Usage: DimShuffle(input, new_order, inplace = True)
* input: a Tensor instance
* new_order: a list representing the relationship between the
input's dimensions and the output's dimensions. Each
element of the list can either be an index or 'x'.
* inplace: if True, the output will be a view of the input.
If False, the output will be a copy of the input.
If j = new_order[i] is an index, the output's ith dimension
will be the input's jth dimension.
If new_order[i] is 'x', the output's ith dimension will
be 1 and Broadcast operations will be allowed to do broadcasting
over that dimension.
If input.broadcastable[i] == False then i must be found in new_order.
Broadcastable dimensions, on the other hand, can be discarded.
Examples:
# t<n> represents a n-d tensor
DimShuffle(t2, [0, 1]) -> identity
DimShuffle(t2, [1, 0]) -> inverts the first and second dimensions
DimShuffle(t1, ['x', 0]) -> make a row out of a 1d vector
DimShuffle(t1, [0, 'x']) -> make a column out of a 1d vector
DimShuffle(t3, [2, 0, 1]) -> like doing t3.transpose((2, 0, 1)) in numpy
DimShuffle(t2, [0, 'x', 1]) -> like doing t3.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
DimShuffle(t2, [1, 'x', 0]) -> like doing t3.T.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
""" """
def __init__(self, input, new_order, inplace = True): def __init__(self, input, new_order, inplace = True):
input = astensor(input) input = astensor(input)
...@@ -44,21 +74,26 @@ class DimShuffle(Op, Viewer): ...@@ -44,21 +74,26 @@ class DimShuffle(Op, Viewer):
self.inplace = inplace self.inplace = inplace
# list of dimensions of the input to drop
self.drop = [] self.drop = []
self.augment = [] i2j = {} # this maps i before dropping dimensions to j after dropping dimensions so self.shuffle can be set properly later on
i2j = {}
j = 0 j = 0
for i, b in enumerate(ib): for i, b in enumerate(ib):
if i not in new_order: if i not in new_order:
# we want to drop this dimension because it's not a value in new_order
if b == 1: if b == 1:
self.drop.append(i) self.drop.append(i)
else: else:
# we cannot drop non-broadcastable dimensions
raise NotImplementedError("You cannot drop a non-broadcastable dimension.") raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
else: else:
i2j[i] = j i2j[i] = j
j += 1 j += 1
# transposition of non-broadcastable dimensions
self.shuffle = [i2j[x] for x in new_order if x != 'x'] self.shuffle = [i2j[x] for x in new_order if x != 'x']
# list of dimensions of the output that are broadcastable and were not in the original input
self.augment = [i for i, x in enumerate(new_order) if x == 'x'] self.augment = [i for i, x in enumerate(new_order) if x == 'x']
def clone_with_new_inputs(self, *new_inputs): def clone_with_new_inputs(self, *new_inputs):
...@@ -77,19 +112,23 @@ class DimShuffle(Op, Viewer): ...@@ -77,19 +112,23 @@ class DimShuffle(Op, Viewer):
return "DimShuffle{%s}" % "".join(str(x) for x in self.new_order) return "DimShuffle{%s}" % "".join(str(x) for x in self.new_order)
def perform(self): def perform(self):
# drop
res = self.inputs[0].data res = self.inputs[0].data
shape = list(res.shape) shape = list(res.shape)
for drop in reversed(self.drop): for drop in reversed(self.drop):
shape.pop(drop) shape.pop(drop)
res = res.reshape(shape) res = res.reshape(shape)
# transpose
res = res.transpose(self.shuffle) res = res.transpose(self.shuffle)
# augment
shape = list(res.shape) shape = list(res.shape)
for augm in self.augment: for augm in self.augment:
shape.insert(augm, 1) shape.insert(augm, 1)
res = res.reshape(shape) res = res.reshape(shape)
# copy (if not inplace)
if not self.inplace: if not self.inplace:
res = numpy.copy(res) res = numpy.copy(res)
...@@ -106,18 +145,6 @@ class DimShuffle(Op, Viewer): ...@@ -106,18 +145,6 @@ class DimShuffle(Op, Viewer):
return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order) return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order)
class Transpose(DimShuffle):
def __init__(self, input):
DimShuffle.__init__(self, input, range(len(input.broadcastable)-1, -1, -1), False)
def clone_with_new_inputs(self, *new_inputs):
return Transpose(new_inputs[0])
def __str__(self):
return "%s(%s)" % (self.__class__.__name__, str(self.inputs[0]))
################# #################
### Broadcast ### ### Broadcast ###
...@@ -125,7 +152,36 @@ class Transpose(DimShuffle): ...@@ -125,7 +152,36 @@ class Transpose(DimShuffle):
class Broadcast(Op, Destroyer): class Broadcast(Op, Destroyer):
""" """
@todo: DOCUMENTATION? --jpt Generalizes a scalar op to tensors.
Usage: Broadcast(scalar_opclass, inputs, inplace_pattern = {})
* scalar_opclass: a class that extends scalar.ScalarOp, works uniquely on
scalars and can be instantiated from the list of its inputs
* inputs: a list of Tensor instances
* inplace_pattern: a dictionary that maps the index of an output to the
index of an input so the output is calculated inplace using
the input's storage.
All the inputs must have the same number of dimensions. When the
Op is performed, for each dimension, each input's size for that
dimension must be the same. As a special case, it can also be 1
but only if the input's broadcastable flag is True for that
dimension. In that case, the tensor is (virtually) replicated
along that dimension to match the size of the others.
The dtypes of the outputs mirror those of the scalar Op that is
being generalized to tensors. However, if the calculations for an
output are done inplace on an input, it will keep the same dtype
as the input (in a nutshell, int + float -> float but int += float -> int)
Examples:
Broadcast(Add, rand(10, 5), rand(10, 5), {0 : 0}) # this does input0 += input1
Broadcast(Add, rand(10, 5), rand(10, 5), {0 : 1}) # this does input1 += input0
Broadcast(Mul, rand(10, 5), rand(1, 5)) # the second input is completed along the first dimension to match the first input
Broadcast(Div, rand(10, 5), rand(10, 1)) # same but along the second dimension
Broadcast(Div, rand(1, 5), rand(10, 1)) # the output has size (10, 5)
Broadcast(Log, rand(3, 4, 5))
""" """
def __init__(self, scalar_opclass, inputs, inplace_pattern = {}): def __init__(self, scalar_opclass, inputs, inplace_pattern = {}):
...@@ -137,6 +193,7 @@ class Broadcast(Op, Destroyer): ...@@ -137,6 +193,7 @@ class Broadcast(Op, Destroyer):
except (AssertionError, AttributeError): except (AssertionError, AttributeError):
raise TypeError("All inputs to a Broadcast subclass must be Tensor instances and their broadcastable fields must all have the same length.", self.__class__) raise TypeError("All inputs to a Broadcast subclass must be Tensor instances and their broadcastable fields must all have the same length.", self.__class__)
# self.shadow is an instance of scalar_opclass used to get values for all the properties we need (dtypes, gradient, etc.)
self.shadow = scalar_opclass(*[Scalar(dtype = t.dtype) for t in inputs]) self.shadow = scalar_opclass(*[Scalar(dtype = t.dtype) for t in inputs])
self.nin = self.shadow.nin self.nin = self.shadow.nin
...@@ -151,6 +208,9 @@ class Broadcast(Op, Destroyer): ...@@ -151,6 +208,9 @@ class Broadcast(Op, Destroyer):
out_dtypes = [t.dtype for t in self.shadow.outputs] out_dtypes = [t.dtype for t in self.shadow.outputs]
def get_dtype(i): def get_dtype(i):
# If an operation is done inplace, the dtype of the output
# will be the same as the dtype of the input it overwrites
# eg int + float -> float, but int += float -> int
input_idx = inplace_pattern.get(i, None) input_idx = inplace_pattern.get(i, None)
if input_idx is not None: if input_idx is not None:
return inputs[input_idx].dtype return inputs[input_idx].dtype
...@@ -185,14 +245,18 @@ class Broadcast(Op, Destroyer): ...@@ -185,14 +245,18 @@ class Broadcast(Op, Destroyer):
shadow = self.shadow shadow = self.shadow
scalar_ograds = [Scalar(dtype = ograd.dtype) for ograd in ograds] scalar_ograds = [Scalar(dtype = ograd.dtype) for ograd in ograds]
scalar_igrads = shadow.grad(shadow.inputs, scalar_ograds) scalar_igrads = shadow.grad(shadow.inputs, scalar_ograds)
nd = len(inputs[0].broadcastable) # this is the same for everyone
def transform(r): def transform(r):
# From a graph of ScalarOps, make a graph of Broadcast ops.
if r in shadow.inputs: if r in shadow.inputs:
return inputs[shadow.inputs.index(r)] return inputs[shadow.inputs.index(r)]
if r in scalar_ograds: if r in scalar_ograds:
return ograds[scalar_ograds.index(r)] return ograds[scalar_ograds.index(r)]
op = r.owner op = r.owner
if op is None: if op is None:
b = [1] * len(inputs[0].broadcastable) # the gradient contains a constant, translate it as
# an equivalent Tensor of size 1 and proper number of dimensions
b = [1] * nd
res = astensor(numpy.asarray(r.data).reshape(b), res = astensor(numpy.asarray(r.data).reshape(b),
broadcastable = b) broadcastable = b)
return res return res
...@@ -202,10 +266,16 @@ class Broadcast(Op, Destroyer): ...@@ -202,10 +266,16 @@ class Broadcast(Op, Destroyer):
ret = [] ret = []
for scalar_igrad, input in zip(scalar_igrads, inputs): for scalar_igrad, input in zip(scalar_igrads, inputs):
if scalar_igrad is None: if scalar_igrad is None:
# undefined gradient
ret.append(None) ret.append(None)
continue continue
r = transform(scalar_igrad) r = transform(scalar_igrad)
# list of all the dimensions that are broadcastable for that input so we
# can sum over them
# todo: only count dimensions that were effectively broadcasted
to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast] to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast]
if to_sum: if to_sum:
shuffle = [] shuffle = []
j = 0 j = 0
...@@ -229,6 +299,7 @@ class Broadcast(Op, Destroyer): ...@@ -229,6 +299,7 @@ class Broadcast(Op, Destroyer):
odat = output.data odat = output.data
shape = [max(values) for values in zip(*[input.data.shape for input in self.inputs])] shape = [max(values) for values in zip(*[input.data.shape for input in self.inputs])]
if odat is not None: if odat is not None:
# reuse storage if we can
odat.resize(shape, refcheck = 0) odat.resize(shape, refcheck = 0)
else: else:
odat = numpy.ndarray(shape, dtype = output.dtype) odat = numpy.ndarray(shape, dtype = output.dtype)
...@@ -325,7 +396,34 @@ class Broadcast(Op, Destroyer): ...@@ -325,7 +396,34 @@ class Broadcast(Op, Destroyer):
def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None): def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
if name is None:
name = "Tensor" + scalar_opclass.__name__
scalar_name = scalar_opclass.__name__
previous_doc = Broadcast.__doc__
scalar_doc = scalar_opclass.__doc__
if scalar_doc:
scalar_doc = """
%(scalar_name)s documentation:
%(scalar_doc)s
""" % locals()
doc = """
Usage: %(name)s(*inputs)
Equivalent to: Broadcast(%(scalar_name)s, inputs, %(inplace_pattern)s)
Performs Scalar %(scalar_name)s on each element of the
input tensors.
%(scalar_doc)s
Documention for Broadcast:
==================================================
%(previous_doc)s
==================================================
""" % locals()
class New(Broadcast): class New(Broadcast):
__doc__ = doc
def __init__(self, *inputs): def __init__(self, *inputs):
Broadcast.__init__(self, scalar_opclass, inputs, inplace_pattern) Broadcast.__init__(self, scalar_opclass, inputs, inplace_pattern)
def clone_with_new_inputs(self, *new_inputs): def clone_with_new_inputs(self, *new_inputs):
...@@ -333,10 +431,7 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None): ...@@ -333,10 +431,7 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
@classmethod @classmethod
def desc(cls): def desc(cls):
return (Broadcast, scalar_opclass, tuple(inplace_pattern.items())) return (Broadcast, scalar_opclass, tuple(inplace_pattern.items()))
if name is not None:
New.__name__ = name New.__name__ = name
else:
New.__name__ = "Tensor" + scalar_opclass.__name__
return New return New
def wrap_broadcast(op): def wrap_broadcast(op):
...@@ -353,6 +448,8 @@ def wrap_broadcast(op): ...@@ -353,6 +448,8 @@ def wrap_broadcast(op):
else: else:
args.append(DimShuffle(input, ['x']*difference + range(length)).out) args.append(DimShuffle(input, ['x']*difference + range(length)).out)
return op(*args) return op(*args)
instantiate.__name__ = "instantiate{%s}" % op.__name__
instantiate.__doc__ = op.__doc__
return instantiate return instantiate
...@@ -363,26 +460,26 @@ def wrap_broadcast(op): ...@@ -363,26 +460,26 @@ def wrap_broadcast(op):
class CAReduce(Op): class CAReduce(Op):
""" """
CAReduce(scalar_op, inputs, dimensions_to_reduce = None, init = None, shortcut = False) Usage: CAReduce(scalar_opclass, inputs, dimensions_to_reduce = None)
The number of inputs must be the difference between the number of * scalar_opclass: a binary scalar op with only one output.
outputs of scalar_op and its number of inputs. L{CAReduce} holds It will be instantiated as such:
scalar states, the accumulators, in proportion to the number of scalar_opclass.__init__([Scalar(t.dtype) for t in inputs])
outputs of scalar_op and it updates them iteratively:: It must be commutative and associative.
* inputs: list of Tensor instances
for x, y, ... in input0, input1, ... * dimensions_to_reduce: list of dimensions that we want to reduce
scalar_state <- scalar_op(scalar_state, x, y, ...)} if None, all dimensions are reduced
The initial states are init if provided (they must be scalars), The output will have the same shape as the input minus the reduced
else if there are as many states as inputs, a sample from each dimensions. It will contain the result of accumulating all values
input will be taken as initialization, else an error will be over the reduced dimensions using the specified scalar op.
raised.
Examples:
If shortcut is True and the scalar op has a 'tbd' field, the CAReduce(Add, inputs) -> sum(inputs)
iteration will try to stop as soon as it encounters the value CAReduce(Mul, inputs) -> product(inputs)
specified for that field and will return it immediately, eg CAReduce(Or, inputs) -> any(inputs) # not lazy
multiply/and will return 0 at first sight of 0 and 'or' will CAReduce(And, inputs) -> all(inputs) # not lazy
return 1 at first sight of 1. CAReduce(Xor, inputs) -> sum(inputs != 0) % 2
In order to optimize memory usage patterns, L{CAReduce} makes zero In order to optimize memory usage patterns, L{CAReduce} makes zero
guarantees on the order in which it iterates over the dimensions guarantees on the order in which it iterates over the dimensions
...@@ -520,13 +617,31 @@ class CAReduce(Op): ...@@ -520,13 +617,31 @@ class CAReduce(Op):
def make_reduce(scalar_opclass, name = None): def make_reduce(scalar_opclass, name = None):
if getattr(scalar_opclass, 'commutative', True) \ if getattr(scalar_opclass, 'commutative', False) \
and getattr(scalar_opclass, 'associative', True): and getattr(scalar_opclass, 'associative', False):
reducer = CAReduce reducer = CAReduce
else: else:
raise NotImplementedError("The scalar op class to reduce must be commutative and associative.") raise NotImplementedError("The scalar op class to reduce must be commutative and associative.")
scalar_name = scalar_opclass.__name__
if name is None:
name = "Reduce" + scalar_name
previous_doc = reducer.__doc__
doc = """
Usage: %(name)s(input, axis)
Equivalent to: CAReduce(%(scalar_name)s, input, axis)
Reduces the input over the specified axis.
Documention for CAReduce:
==================================================
%(previous_doc)s
==================================================
""" % locals()
class New(reducer): class New(reducer):
__doc__ = doc
def __init__(self, *inputs, **kwargs): def __init__(self, *inputs, **kwargs):
reducer.__init__(self, scalar_opclass, inputs, kwargs.get('axis', None)) reducer.__init__(self, scalar_opclass, inputs, kwargs.get('axis', None))
def clone_with_new_inputs(self, *new_inputs): def clone_with_new_inputs(self, *new_inputs):
...@@ -540,13 +655,12 @@ def make_reduce(scalar_opclass, name = None): ...@@ -540,13 +655,12 @@ def make_reduce(scalar_opclass, name = None):
return "%s(%s, axis = %s)" % (self.__class__.__name__, return "%s(%s, axis = %s)" % (self.__class__.__name__,
str(input), str(input),
self.dimensions_to_reduce) self.dimensions_to_reduce)
if name is not None:
New.__name__ = name New.__name__ = name
else:
New.__name__ = "Reduce" + scalar_opclass.__name__
return New return New
class Sum(make_reduce(scalar.Add)): _Sum = make_reduce(scalar.Add, '_Sum')
class Sum(_Sum):
__doc__ = _Sum.__doc__
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
if self.dimensions_to_reduce == (): if self.dimensions_to_reduce == ():
return gz, return gz,
......
...@@ -16,7 +16,7 @@ __all__ = ['Op', ...@@ -16,7 +16,7 @@ __all__ = ['Op',
] ]
def constructor(op_cls): def constructor(op_cls, name = None):
"""Make an Op look like a L{Result}-valued function.""" """Make an Op look like a L{Result}-valued function."""
def f(*args, **kwargs): def f(*args, **kwargs):
op = op_cls(*args, **kwargs) op = op_cls(*args, **kwargs)
...@@ -24,6 +24,17 @@ def constructor(op_cls): ...@@ -24,6 +24,17 @@ def constructor(op_cls):
return op.outputs return op.outputs
else: else:
return op.outputs[0] return op.outputs[0]
opname = op_cls.__name__
if name is None:
name = "constructor{%s}" % opname
f.__name__ = name
doc = op_cls.__doc__
f.__doc__ = """
Constructor for %(opname)s:
%(doc)s
""" % locals()
return f return f
class Op(object): class Op(object):
......
...@@ -246,6 +246,8 @@ class FloatUnaryScalarOp(UnaryScalarOp): ...@@ -246,6 +246,8 @@ class FloatUnaryScalarOp(UnaryScalarOp):
class Add(ScalarOp): class Add(ScalarOp):
identity = 0 identity = 0
commutative = True
associative = True
def impl(self, *inputs): def impl(self, *inputs):
return sum(inputs) return sum(inputs)
def c_code(self, inputs, (z, ), sub): def c_code(self, inputs, (z, ), sub):
...@@ -258,6 +260,8 @@ class Add(ScalarOp): ...@@ -258,6 +260,8 @@ class Add(ScalarOp):
class Mul(ScalarOp): class Mul(ScalarOp):
identity = 1 identity = 1
commutative = True
associative = True
def impl(self, *inputs): def impl(self, *inputs):
return numpy.product(inputs) return numpy.product(inputs)
def c_code(self, inputs, (z, ), sub): def c_code(self, inputs, (z, ), sub):
...@@ -424,27 +428,37 @@ class Tan(FloatUnaryScalarOp): ...@@ -424,27 +428,37 @@ class Tan(FloatUnaryScalarOp):
def impl(self, x): def impl(self, x):
return math.tan(x) return math.tan(x)
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
raise NotImplementedError() return gz / (cos(x) ** 2),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = tan(%(x)s);" % locals() return "%(z)s = tan(%(x)s);" % locals()
class Cosh(FloatUnaryScalarOp): class Cosh(FloatUnaryScalarOp):
"""
sinh(x) = (exp(x) + exp(-x)) / 2
"""
def impl(self, x): def impl(self, x):
return math.cosh(x) return math.cosh(x)
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
raise NotImplementedError() return gz * sinh(x),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = cosh(%(x)s);" % locals() return "%(z)s = cosh(%(x)s);" % locals()
class Sinh(FloatUnaryScalarOp): class Sinh(FloatUnaryScalarOp):
"""
sinh(x) = (exp(x) - exp(-x)) / 2
"""
def impl(self, x): def impl(self, x):
return math.sinh(x) return math.sinh(x)
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
raise NotImplementedError() return gz * cosh(x),
def c_code(self, (x, ), (z, ), sub): def c_code(self, (x, ), (z, ), sub):
return "%(z)s = sinh(%(x)s);" % locals() return "%(z)s = sinh(%(x)s);" % locals()
class Tanh(FloatUnaryScalarOp): class Tanh(FloatUnaryScalarOp):
"""
tanh(x) = sinh(x) / cosh(x)
= (exp(2*x) - 1) / (exp(2*x) + 1)
"""
def impl(self, x): def impl(self, x):
return math.tanh(x) return math.tanh(x)
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
......
...@@ -218,15 +218,14 @@ TensorCopy, tensor_copy = broadcast(scal.Identity, 'TensorCopy', False) ...@@ -218,15 +218,14 @@ TensorCopy, tensor_copy = broadcast(scal.Identity, 'TensorCopy', False)
# View Operations # View Operations
########################## ##########################
class TransposeInplace(_Op, Viewer): class TransposeInplace(s2t.DimShuffle):
def view_map(self):
return {self.out: [self.inputs[0]]} def __init__(self, input):
def propagate_broadcastable(self, x): s2t.DimShuffle.__init__(self, input, range(len(input.broadcastable)-1, -1, -1), True)
rval = list(x)
rval.reverse() def perform(self):
return [rval] self.outputs[0].data = self.inputs[0].data.T
def impl(self, x):
return x.T #numpy's transpose
def grad(self, (x,), (gz,)): def grad(self, (x,), (gz,)):
return transpose(gz), return transpose(gz),
...@@ -238,6 +237,7 @@ class TransposeInplace(_Op, Viewer): ...@@ -238,6 +237,7 @@ class TransposeInplace(_Op, Viewer):
} }
%(z)s = transposed; %(z)s = transposed;
""" % locals() """ % locals()
transpose_inplace = gof.op.constructor(TransposeInplace) transpose_inplace = gof.op.constructor(TransposeInplace)
def transpose(x, **kwargs): def transpose(x, **kwargs):
return transpose_inplace(tensor_copy(x), **kwargs) return transpose_inplace(tensor_copy(x), **kwargs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论