提交 64a95964 authored 作者: Frederic Bastien's avatar Frederic Bastien

make T.max() return a CAReduce instance as this is faster.

Add test for CAReduce{maximum,mul} and modified CAReduce to handle correctly maximum.
上级 31380660
......@@ -1406,11 +1406,17 @@ def max(x, axis=None):
Return maximum elements obtained by iterating over given axis
Default axis is the last one.
:note: we return an error as numpy when we reduce a dim with a shape of 0
"""
# In python (using MaxAndArgmax.perform()) this leads to an wasteful
# implementation that goes through the data twice instead of once
# but when Argmax.c_impl() is in place, it should be fine.
return max_and_argmax(x,axis)[0]
if isinstance(axis,int) or axis is None:
return CAReduce(scal.maximum,axis)(x)
#TODO: do CAReduce need axis to be constant?
try:
const = get_constant_value(axis)
return CAReduce(scal.maximum,list(const))(x)
except:
return max_and_argmax(x,axis)[0]
@constructor
def argmax(x, axis=None):
......
......@@ -782,6 +782,7 @@ class CAReduce(Op):
Examples:
CAReduce(add) -> sum
CAReduce(mul) -> product
CAReduce(maximum) -> sum
CAReduce(_or) -> any # not lazy
CAReduce(_and) -> all # not lazy
......@@ -790,7 +791,7 @@ class CAReduce(Op):
iterates over the dimensions and the elements of the
array(s). Therefore, to ensure consistent variables, the scalar
operation represented by the reduction must be both commutative
and associative (eg add, multiply, binary or/and/xor - but not
and associative (eg add, multiply, maximum, binary or/and/xor - but not
subtract, divide or power).
"""
......@@ -927,10 +928,36 @@ class CAReduce(Op):
alloc += cgen.make_declare([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname))
alloc += cgen.make_alloc([order1], odtype, sub)
alloc += cgen.make_checks([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname))
if hasattr(self.scalar_op,'identity'):
identity = self.scalar_op.identity
elif self.scalar_op == scalar.maximum:
if input.type.dtype in ["float32","float64"]:
identity = "-__builtin_inf()"
else:
identity = "NPY_MIN_"+str(input.type.dtype).upper()
fail = sub["fail"]
pattern=[0]*len(node.inputs[0].broadcastable)
axis = self.axis
if axis == None: axis = range(len(pattern))
for i in axis:
pattern[i]=1
pattern_ = str(pattern)[1:-1]
decl +="""int tosum[]={%(pattern_)s};"""%locals()
alloc += """
for(int i=0;i<%(iname)s->nd;i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
PyErr_Format(PyExc_ValueError, "Input of CAReduce{maximum} has zero-size on axis %%d",i);
%(fail)s;
}
}
"""%locals()
else:
raise Exception("The CAReduce.scalar_op must have an identity field.")
task0_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n%(name)s_i = %(identity)s;" % dict(dtype = odtype,
name = onames[0],
identity = self.scalar_op.identity)
identity = identity)
task1_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % dict(dtype = idtype, name = inames[0])
......
......@@ -154,7 +154,7 @@ class test_CAReduce(unittest.TestCase):
def setUp(self):
unittest_tools.seed_rng()
def with_linker(self, linker):
def with_linker(self, linker, scalar_op = add):
for xsh, tosum in [((5, 6), None),
((5, 6), (0, 1)),
((5, 6), (0, )),
......@@ -165,29 +165,70 @@ class test_CAReduce(unittest.TestCase):
((5, 0), (1, )),
((), ())]:
x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
e = CAReduce(add, axis = tosum)(x)
e = CAReduce(scalar_op, axis = tosum)(x)
if tosum is None: tosum = range(len(xsh))
f = copy(linker).accept(Env([x], [e])).make_function()
xv = numpy.asarray(numpy.random.rand(*xsh))
zv = xv
for axis in reversed(sorted(tosum)):
zv = numpy.add.reduce(zv, axis)
self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
numpy_raised = False
if scalar_op == add:
for axis in reversed(sorted(tosum)):
zv = numpy.add.reduce(zv, axis)
elif scalar_op == mul:
for axis in reversed(sorted(tosum)):
zv = numpy.multiply.reduce(zv, axis)
elif scalar_op == maximum:
try:
for axis in reversed(sorted(tosum)):
zv = numpy.maximum.reduce(zv, axis)
except ValueError:
numpy_raised=True
elif scalar_op == or_:
for axis in reversed(sorted(tosum)):
zv = numpy.any(zv, axis)
elif scalar_op == and_:
for axis in reversed(sorted(tosum)):
zv = numpy.all(zv, axis)
else:
raise Exception("Test for CAReduce with scalar_op %s not implemented"%str(scalar_op))
if scalar_op == maximum and numpy_raised:
try:
f(xv)
except ValueError:
pass
else:
self.fail()
else:
self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
#test CAReduce.infer_shape
#the Shape op don't implement c_code!
if isinstance(linker,gof.PerformLinker):
x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
e = CAReduce(add, axis = tosum)(x)
e = CAReduce(scalar_op, axis = tosum)(x)
if tosum is None: tosum = range(len(xsh))
f = copy(linker).accept(Env([x], [e.shape])).make_function()
assert all(f(xv)== zv.shape)
if not(scalar_op == maximum and ((xsh==() or numpy.prod(xsh)==0))):
assert all(f(xv)== zv.shape)
def test_perform(self):
self.with_linker(gof.PerformLinker())
self.with_linker(gof.PerformLinker(), add)
self.with_linker(gof.PerformLinker(), mul)
self.with_linker(gof.PerformLinker(), maximum)
#need other dtype then real
#self.with_linker(gof.PerformLinker(), or_)
#self.with_linker(gof.PerformLinker(), and_)
def test_c(self):
self.with_linker(gof.CLinker())
self.with_linker(gof.CLinker(), add)
self.with_linker(gof.CLinker(), mul)
self.with_linker(gof.CLinker(), maximum)
#need other dtype then real
#no c_code for or_, and_
#self.with_linker(gof.CLinker(), or_)
#self.with_linker(gof.CLinker(), and_)
if __name__ == '__main__':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论