提交 f9be5a48 authored 作者: James Bergstra's avatar James Bergstra

Automated merge with ssh://p-omega1@lgcm/theano

......@@ -7,6 +7,8 @@ import gradient
from sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
from sparse import _mtypes, _mtype_to_str
import random
class T_transpose(unittest.TestCase):
def setUp(self):
numpy.random.seed(44)
......
......@@ -566,6 +566,17 @@ def check_eq2_both(self, inputs, output, args_in, arg_out):
val = fn(*args_in)
self.failUnless( numpy.all(val == arg_out), (val, arg_out))
class T_Shape(unittest.TestCase):
def test_basic0(self):
s = shape(numpy.ones((5, 3)))
self.failUnless((eval_outputs([s]) == [5, 3]).all())
def test_basic1(self):
s = shape(numpy.ones((2)))
self.failUnless((eval_outputs([s]) == [2]).all())
def test_basic2(self):
s = shape(numpy.ones((5, 3, 10)))
self.failUnless((eval_outputs([s]) == [5, 3, 10]).all())
class T_argmax(unittest.TestCase):
def setUp(self):
numpy.random.seed(123784)
......@@ -819,6 +830,21 @@ class T_subtensor(unittest.TestCase):
self.failUnless(numpy.all(tval == 0))
class T_Stack(unittest.TestCase):
def test_hstack(self):
a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
b = astensor(numpy.array([[7], [8]]), broadcastable=[False,False])
s = horizontal_stack(a, b)
c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
self.failUnless((eval_outputs([s]) == c).all())
def test_vstack(self):
a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
b = astensor(numpy.array([[7, 8, 9]]), broadcastable=[False,False])
s = vertical_stack(a, b)
c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
self.failUnless((eval_outputs([s]) == c).all())
class T_add(unittest.TestCase):
def test_complex_all_ops(self):
......
......@@ -25,37 +25,37 @@ class _test_inplace_opt(unittest.TestCase):
x, y, z = inputs()
e = x + y + z
g = Env([x, y], [e])
assert str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]"
self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]")
inplace_optimizer.optimize(g)
assert str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]"
self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]")
def test_multiple_uses(self):
x, y, z = inputs()
e0 = x + y
e1 = x * y
g = Env([x, y], [e0, e1])
assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]"
self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]")
inplace_optimizer.optimize(g)
assert str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
def test_user_inplace(self):
x, y, z = inputs()
e0 = x + y
e1 = tensor.mul_inplace(x, y)
g = Env([x, y], [e0, e1])
assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
inplace_optimizer.optimize(g)
assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
def test_inplace_on_second_argument(self):
x, y, z = inputs()
e0 = x + y
e1 = tensor.mul_inplace(x, z)
g = Env([x, y], [e0, e1])
assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]")
inplace_optimizer.optimize(g)
assert str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
self.failUnless(str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]")
class _test_dimshuffle_lift(unittest.TestCase):
......@@ -64,23 +64,23 @@ class _test_dimshuffle_lift(unittest.TestCase):
x, y, z = inputs()
e = ds(ds(x, (1, 0)), (1, 0))
g = Env([x], [e])
assert str(g) == "[DimShuffle{10}(DimShuffle{10}(x))]"
self.failUnless(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x))]")
lift_dimshuffle.optimize(g)
assert str(g) == "[x]"
self.failUnless(str(g) == "[x]")
def test_merge2(self):
x, y, z = inputs()
e = ds(ds(x, (1, 'x', 0)), (2, 0, 'x', 1))
g = Env([x], [e])
self.failUnless(str(g) == "[DimShuffle{20x1}(DimShuffle{1x0}(x))]", str(g))
self.failUnless(str(g) == "[InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{1,x,0}(x))]", str(g))
lift_dimshuffle.optimize(g)
self.failUnless(str(g) == "[DimShuffle{01xx}(x)]", str(g))
self.failUnless(str(g) == "[InplaceDimShuffle{0,1,x,x}(x)]", str(g))
def test_elim3(self):
x, y, z = inputs()
e = ds(ds(ds(x, (0, 'x', 1)), (2, 0, 'x', 1)), (1, 0))
g = Env([x], [e])
self.failUnless(str(g) == "[DimShuffle{10}(DimShuffle{20x1}(DimShuffle{0x1}(x)))]", str(g))
self.failUnless(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{0,x,1}(x)))]", str(g))
lift_dimshuffle.optimize(g)
self.failUnless(str(g) == "[x]", str(g))
......@@ -88,9 +88,9 @@ class _test_dimshuffle_lift(unittest.TestCase):
x, y, z = inputs([0]*1, [0]*2, [0]*3)
e = x + y + z
g = Env([x, y, z], [e])
self.failUnless(str(g) == "[Broadcast{Add}(DimShuffle{x01}(Broadcast{Add}(DimShuffle{x0}(x), y)), z)]", str(g))
self.failUnless(str(g) == "[Broadcast{Add}(InplaceDimShuffle{x,0,1}(Broadcast{Add}(InplaceDimShuffle{x,0}(x), y)), z)]", str(g))
lift_dimshuffle.optimize(g)
self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(DimShuffle{xx0}(x), DimShuffle{x01}(y)), z)]", str(g))
self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]", str(g))
class _test_cliques(unittest.TestCase):
......@@ -103,10 +103,10 @@ class _test_cliques(unittest.TestCase):
e = x + y + d
g = Env([x, y, z], [e])
cliques = find_cliques(g)
assert len(cliques) == 2
self.failUnless(len(cliques) == 2)
(i1, o1), (i2, o2) = cliques
assert str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]"
assert str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]"
self.failUnless(str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]")
self.failUnless(str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]")
# print g
# for i, o in find_cliques(g):
# print "-->", Env(i, [o])
......@@ -116,8 +116,8 @@ class _test_cliques(unittest.TestCase):
e = x + y + z
g = Env([x, y, z], [e])
lift_dimshuffle.optimize(g)
assert len(find_cliques(g, through_broadcast = True)) == 1
assert len(find_cliques(g, through_broadcast = False)) == 2
self.failUnless(len(find_cliques(g, through_broadcast = True)) == 1)
self.failUnless(len(find_cliques(g, through_broadcast = False)) == 2)
# print g
# for i, o in find_cliques(g, True):
# print "-->", Env(i, [o])
......
......@@ -189,6 +189,24 @@ def eval_outputs(outputs,
return rval
def infer_reuse_pattern(env, outputs_to_disown):
do_not_reuse = outputs_to_disown
seen = set()
def walk(r):
if env.edge(r) or r in seen:
return
seen.add(r)
do_not_reuse.append(r)
op = r.owner
dmap = op.destroy_map() if hasattr(op, 'destroy_map') else {}
vmap = op.view_map() if hasattr(op, 'view_map') else {}
cat = lambda x, y: list(x) + list(y)
for r2 in reduce(cat, dmap.values()) + reduce(cat, vmap.values()):
accumulate(r2)
for output in outputs_to_disown:
walk(output)
return do_not_reuse
# StateFunction([x, y], [e], (w, w + lr * bla()))
......
......@@ -105,10 +105,13 @@ class DimShuffle(Op, Viewer):
return {}
def desc(self):
return (self.__class__, tuple(self.new_order))
return (self.__class__, tuple(self.new_order), self.inplace)
def strdesc(self):
return "DimShuffle{%s}" % "".join(str(x) for x in self.new_order)
if self.inplace:
return "InplaceDimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
def perform(self):
# drop
......@@ -412,11 +415,14 @@ class Broadcast(Op, Destroyer):
def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None, module_name = None):
scalar_name = scalar_opclass.__name__
if name is None:
name = "Tensor" + scalar_opclass.__name__
name = scalar_name
if module_name is None:
module_name = 'elemwise.make_broadcast(%s, %s, %s)' % (scalar_name, inplace_pattern, repr(name))
name = "New"
scalar_name = scalar_opclass.__name__
previous_doc = Broadcast.__doc__
scalar_doc = scalar_opclass.__doc__ or ""
......@@ -449,6 +455,7 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
def desc(cls):
return (Broadcast, scalar_opclass, tuple(inplace_pattern.items()))
New.__name__ = name
New.__module__ = module_name
return New
def wrap_broadcast(op):
......
......@@ -493,4 +493,3 @@ def view_roots(r):
return [r]
else:
return [r]
......@@ -115,8 +115,9 @@ class PerformLinker(Linker):
the L{Env} in the order given by L{Env.toposort}.
"""
def __init__(self, env):
def __init__(self, env, no_recycling = []):
self.env = env
self.no_recycling = no_recycling
def make_thunk(self, inplace = False, profiler = None):
if inplace:
......@@ -125,8 +126,14 @@ class PerformLinker(Linker):
env = self.env.clone(True)
order = env.toposort()
thunks = [op.perform for op in order]
no_recycling = self.no_recycling
if no_recycling is True:
no_recycling = list(env.results())
no_recycling = utils.difference(no_recycling, env.inputs)
if profiler is None:
def f():
for r in no_recycling:
r.data = None
try:
for thunk, op in zip(thunks, order):
thunk()
......@@ -134,6 +141,8 @@ class PerformLinker(Linker):
raise_with_op(op)
else:
def f():
for r in no_recycling:
r.data = None
def g():
for thunk, op in zip(thunks, order):
profiler.profile_op(thunk, op)
......
......@@ -320,3 +320,4 @@ def dot(x, y, grad_preserves_dense=True):
else:
assert y_is_sparse_result
return transpose(Dot(y.T, x.T, grad_preserves_dense).outputs[0])
......@@ -317,7 +317,7 @@ def astensor(data, broadcastable=None, name=None):
raise ValueError("Cannot rename an existing Tensor.")
return data
elif isinstance(data, Result):
raise TypeError("Cannot make a Tensor out of a non-Tensor result:", data)
raise TypeError("Cannot make a Tensor out of a result that is not an instance of Tensor: %s (%s)" % (data, data.__class__.__name__), data)
if data is None and broadcastable is None:
raise TypeError("Cannot make a Tensor out of None.")
......@@ -445,16 +445,38 @@ class _Op(Op):
# Unary Operations
##########################
def broadcast(scalar_opclass, name, inplace_versions = True):
C = s2t.make_broadcast(scalar_opclass, name = name)
def broadcast(scalar_opclass, name, module_name = None, inplace_versions = True):
C = s2t.make_broadcast(scalar_opclass, name = name, module_name = module_name) # this returns a class
C.__module__ = module_name
c = gof.op.constructor(s2t.wrap_broadcast(C))
if inplace_versions:
CInplace = s2t.make_broadcast(scalar_opclass, {0:0}, name = name+"Inplace")
CInplace.__module__ = module_name
c_inplace = gof.op.constructor(s2t.wrap_broadcast(CInplace))
return C, c, CInplace, c_inplace
else:
return C, c
def _broadcast(scalar_opclass, name, inplace_versions = True):
return broadcast(scalar_opclass, name, 'tensor', inplace_versions)
class Shape(Op):
"""
L{Op} to return the shape of a matrix.
@note: Non-differentiable.
"""
def __init__(self, x, **kwargs):
Op.__init__(self, **kwargs)
x = astensor(x)
self.inputs = [x]
self.outputs = [Tensor("int64", [False])]
def impl(self, x):
return numpy.asarray(x.shape)
def grad(self, (x,), (gz,)):
raise ValueError
shape = gof.op.constructor(Shape)
class Argmax(Op):
"""Calculate the max and argmax over a given axis"""
nin=2 # tensor, axis
......@@ -487,32 +509,43 @@ def max(x, axis=None):
# but when Argmax.c_impl() is in place, it should be fine.
return argmax(x,axis)[0]
Abs, _abs, AbsInplace, abs_inplace = broadcast(scal.Abs, 'Abs')
Exp, exp, ExpInplace, exp_inplace = broadcast(scal.Exp, 'Exp')
Neg, neg, NegInplace, neg_inplace = broadcast(scal.Neg, 'Neg')
Log, log, LogInplace, log_inplace = broadcast(scal.Log, 'Log')
Log2, log2, Log2Inplace, log2_inplace = broadcast(scal.Log2, 'Log2')
Sgn, sgn, SgnInplace, sgn_inplace = broadcast(scal.Sgn, 'Sgn')
Sqr, sqr, SqrInplace, sqr_inplace = broadcast(scal.Sqr, 'Sqr')
Sqrt, sqrt, SqrtInplace, sqrt_inplace = broadcast(scal.Sqrt, 'Sqrt')
Cos, cos, CosInplace, cos_inplace = broadcast(scal.Cos, 'Cos')
Sin, sin, SinInplace, sin_inplace = broadcast(scal.Sin, 'Sin')
Tan, tan, TanInplace, tan_inplace = broadcast(scal.Tan, 'Tan')
Cosh, cosh, CoshInplace, cosh_inplace = broadcast(scal.Cosh, 'Cosh')
Sinh, sinh, SinhInplace, sinh_inplace = broadcast(scal.Sinh, 'Sinh')
Tanh, tanh, TanhInplace, tanh_inplace = broadcast(scal.Tanh, 'Tanh')
Sum = s2t.Sum
sum = gof.op.constructor(Sum)
Fill, fill, FillInplace, fill_inplace = broadcast(scal.Second, 'Fill')
Abs, _abs, AbsInplace, abs_inplace = _broadcast(scal.Abs, 'Abs')
Exp, exp, ExpInplace, exp_inplace = _broadcast(scal.Exp, 'Exp')
Neg, neg, NegInplace, neg_inplace = _broadcast(scal.Neg, 'Neg')
Log, log, LogInplace, log_inplace = _broadcast(scal.Log, 'Log')
Log2, log2, Log2Inplace, log2_inplace = _broadcast(scal.Log2, 'Log2')
Sgn, sgn, SgnInplace, sgn_inplace = _broadcast(scal.Sgn, 'Sgn')
Sqr, sqr, SqrInplace, sqr_inplace = _broadcast(scal.Sqr, 'Sqr')
Sqrt, sqrt, SqrtInplace, sqrt_inplace = _broadcast(scal.Sqrt, 'Sqrt')
Cos, cos, CosInplace, cos_inplace = _broadcast(scal.Cos, 'Cos')
Sin, sin, SinInplace, sin_inplace = _broadcast(scal.Sin, 'Sin')
Tan, tan, TanInplace, tan_inplace = _broadcast(scal.Tan, 'Tan')
Cosh, cosh, CoshInplace, cosh_inplace = _broadcast(scal.Cosh, 'Cosh')
Sinh, sinh, SinhInplace, sinh_inplace = _broadcast(scal.Sinh, 'Sinh')
Tanh, tanh, TanhInplace, tanh_inplace = _broadcast(scal.Tanh, 'Tanh')
Fill, fill, FillInplace, fill_inplace = _broadcast(scal.Second, 'Fill')
def ones_like(model):
return fill(model, 1.0)
def zeros_like(model):
return fill(model, 0.0)
TensorCopy, tensor_copy = broadcast(scal.Identity, 'TensorCopy', False)
TensorCopy, tensor_copy = _broadcast(scal.Identity, 'TensorCopy', inplace_versions = False)
Sum = s2t.Sum
sum = gof.op.constructor(Sum)
##########################
# Arithmetics
##########################
Add, add, AddInplace, add_inplace = _broadcast(scal.Add, 'Add')
Sub, sub, SubInplace, sub_inplace = _broadcast(scal.Sub, 'Sub')
Mul, mul, MulInplace, mul_inplace = _broadcast(scal.Mul, 'Mul')
Div, div, DivInplace, div_inplace = _broadcast(scal.Div, 'Div')
Pow, pow, PowInplace, pow_inplace = _broadcast(scal.Pow, 'Pow')
##########################
......@@ -606,15 +639,59 @@ class Subtensor(Op, Viewer):
subtensor = gof.op.constructor(Subtensor)
##########################
# Arithmetics
##########################
class VerticalStack(Op):
"""
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def __init__(self, x, y, **kwargs):
Op.__init__(self, **kwargs)
x = astensor(x)
y = astensor(y)
assert x.dtype == y.dtype
if x.broadcastable[1:] != y.broadcastable[1:]:
raise NotImplementedError
self.inputs = [x, y]
bcastable = (False, ) + x.broadcastable[1:]
self.outputs = [Tensor(x.dtype, bcastable)]
def impl(self, x, y):
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
Add, add, AddInplace, add_inplace = broadcast(scal.Add, 'Add')
Sub, sub, SubInplace, sub_inplace = broadcast(scal.Sub, 'Sub')
Mul, mul, MulInplace, mul_inplace = broadcast(scal.Mul, 'Mul')
Div, div, DivInplace, div_inplace = broadcast(scal.Div, 'Div')
Pow, pow, PowInplace, pow_inplace = broadcast(scal.Pow, 'Pow')
return numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
"""
xs = shape(x)
ys = shape(y)
return gz[:xs[0]], gz[xs[0]:]
vertical_stack = gof.op.constructor(VerticalStack)
def horizontal_stack(x, y, **kwargs):
"""
Horizontally stack two L{Tensor}s.
Stack two L{Tensor}s along the second axis (column wise). These
L{Tensor}s must have the same shape along all dimensions but the
second.
@note: Unlike VerticalStack, we assume that the L{Tensor}s have
two dimensions.
"""
assert x.ndim == 2
assert y.ndim == 2
return transpose(vertical_stack(x.T, y.T, **kwargs))
#########################
......@@ -624,8 +701,7 @@ Pow, pow, PowInplace, pow_inplace = broadcast(scal.Pow, 'Pow')
class Dot(_Op):
nin=2
nout=1
@staticmethod
def broadcastable_rule(bx,by):
def propagate_broadcastable(self, bx, by):
if len(bx) == 0: # x is a scalar
rval = by
else:
......@@ -635,20 +711,11 @@ class Dot(_Op):
rval = bx[:-1]
else: #y is a scalar
rval = bx
return rval
def propagate_broadcastable(self, bx, by):
return [self.broadcastable_rule(bx,by)]
return [rval]
def impl(self, x, y):
return numpy.dot(x, y)
def grad(self, (x, y), (gz,)):
return dot(gz, y.T), dot(x.T, gz)
if 0:
def c_support_code(self):
return blas.cblas_header_text()
def c_libs(self):
return blas.ldflags()
def c_impl(self, (_x, _y), (_z, )):
return blas.gemm_code('', '1.0', '0.0')
dot = gof.op.constructor(Dot)
class Gemm(_Op):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论