Unverified 提交 84936418 authored 作者: Kaustubh's avatar Kaustubh 提交者: GitHub

Refactor and simplify CAReduce, Max, and Min Ops (#297)

上级 584c0c15
......@@ -3024,11 +3024,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
"""
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, "identity"):
if scalar_op.identity is None:
raise ValueError("No identity on scalar op")
CAReduceDtype.__init__(
self, scalar_op, axis=axis, dtype=dtype, acc_dtype=acc_dtype
)
super().__init__(scalar_op, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
def __str__(self):
ax = ""
......@@ -3038,7 +3036,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def make_node(self, input):
ctx_name = infer_context_name(input)
res = CAReduceDtype.make_node(self, input)
res = super().make_node(input)
input = as_gpuarray_variable(input, ctx_name)
otype = GpuArrayType(
dtype=res.outputs[0].dtype,
......
......@@ -1259,19 +1259,19 @@ class UnaryScalarOp(ScalarOp):
class BinaryScalarOp(ScalarOp):
# One may define in subclasses the following fields:
# - `identity`: for an associative operation, identity corresponds to
# the neutral element. For instance, it will be 0 for addition, 1 for
# multiplication, True for "and", False for "or".
# - `commutative`: whether op(a, b) == op(b, a)
# - `associative`: whether op(op(a, b), c) == op(a, op(b, c))
commutative = None
associative = None
identity = None
"""
For an associative operation, the identity object corresponds to the neutral
element. For instance, it will be ``0`` for addition, ``1`` for multiplication,
``True`` for ``and``, ``False`` for ``or``.
"""
nin = 2
###############
# Comparisons
###############
class LogicalComparison(BinaryScalarOp):
def __init__(self, *args, **kwargs):
BinaryScalarOp.__init__(self, *args, **kwargs)
......@@ -1725,6 +1725,7 @@ class ScalarMaximum(BinaryScalarOp):
associative = True
nfunc_spec = ("maximum", 2, 1)
nfunc_variadic = "maximum"
identity = -np.inf
def impl(self, *inputs):
# The built-in max function don't support complex type
......@@ -1767,6 +1768,7 @@ class ScalarMinimum(BinaryScalarOp):
associative = True
nfunc_spec = ("minimum", 2, 1)
nfunc_variadic = "minimum"
identity = np.inf
def impl(self, *inputs):
# The built-in min function don't support complex type
......
......@@ -15,19 +15,10 @@ from aesara.misc.frozendict import frozendict
from aesara.misc.safe_asarray import _asarray
from aesara.printing import FunctionPrinter, pprint
from aesara.scalar import get_scalar_type
from aesara.scalar.basic import (
AND,
OR,
XOR,
Add,
Mul,
Scalar,
ScalarMaximum,
ScalarMinimum,
)
from aesara.scalar.basic import Scalar
from aesara.scalar.basic import bool as scalar_bool
from aesara.scalar.basic import identity as scalar_identity
from aesara.scalar.basic import scalar_maximum, scalar_minimum, transfer_type, upcast
from aesara.scalar.basic import transfer_type, upcast
from aesara.tensor import elemwise_cgen as cgen
from aesara.tensor.type import (
TensorType,
......@@ -1209,7 +1200,7 @@ second dimension
"""
% locals()
)
return decl, checks, alloc, loop
return decl, checks, alloc, loop, ""
def c_code(self, node, nodename, inames, onames, sub):
if (
......@@ -1310,47 +1301,26 @@ class CAReduce(COp):
raise NotImplementedError(
"CAReduce only supports binary functions with a single " "output."
)
self.scalar_op = scalar_op
if axis is None:
self.axis = axis
# There is a bug in numpy that results in isinstance(x,
# integer_types) returning False for numpy integers. See
# <http://projects.scipy.org/numpy/ticket/2235>.
elif isinstance(axis, (int, np.integer)):
self.axis = (axis,)
elif isinstance(axis, np.ndarray) and axis.ndim == 0:
self.axis = (int(axis),)
else:
self.axis = list({int(a) for a in axis})
self.axis.sort()
self.axis = tuple(self.axis)
self.axis = None
self.ufunc_is_vectorized = False
self.scalar_op = scalar_op
self.set_ufunc(scalar_op)
if axis is not None:
if isinstance(axis, (int, np.integer)) or (
isinstance(axis, np.ndarray) and not axis.shape
):
self.axis = (int(axis),)
else:
self.axis = tuple(axis)
def set_ufunc(self, scalar_op):
# TODO FIXME: Why would we ever do this, instead of allowing the `Op`
# itself to tell us which `ufunc` it should use?
if isinstance(scalar_op, Add):
self.ufunc = np.add
elif isinstance(scalar_op, Mul):
self.ufunc = np.multiply
elif isinstance(scalar_op, ScalarMaximum):
self.ufunc = np.maximum
elif isinstance(scalar_op, ScalarMinimum):
self.ufunc = np.minimum
elif isinstance(scalar_op, AND) and _numpy_ver >= [1, 12]:
# numpy.bitwise_and.identity was incorrect for versions before
# 1.12 (it was 1 instead of -1), so we skip it in that case.
# We will fall back to the "else:" case, which defines a
# ufunc without identity.
self.ufunc = np.bitwise_and
elif isinstance(scalar_op, OR):
self.ufunc = np.bitwise_or
elif isinstance(scalar_op, XOR):
self.ufunc = np.bitwise_xor
if hasattr(scalar_op, "nfunc_spec") and hasattr(np, scalar_op.nfunc_spec[0]):
self.ufunc = getattr(np, scalar_op.nfunc_spec[0])
else:
self.ufunc = np.frompyfunc(scalar_op.impl, 2, 1)
self.ufunc_is_vectorized = True
def _output_dtype(self, input_dtype):
return input_dtype
......@@ -1359,41 +1329,41 @@ class CAReduce(COp):
from aesara.tensor.basic import as_tensor_variable
input = as_tensor_variable(input)
inp_dims = input.type.ndim
inp_bdcast = input.type.broadcastable
inp_dtype = input.type.dtype
copy_op = False
if self.axis is not None:
for axis in self.axis:
if axis >= input.type.ndim or (
axis < 0 and abs(axis) > input.type.ndim
):
raise ValueError(
f"Not enough dimensions on {input} to reduce on axis {axis}"
)
input = as_tensor_variable(input)
axis = self.axis
if axis is None:
axis = list(range(len(input.type.broadcastable)))
if any(a < 0 for a in axis):
axis2 = []
for a in self.axis:
if a < 0:
axis2.append(a + input.type.ndim)
else:
axis2.append(a)
assert len(axis) == len(axis2)
axis = tuple(axis2)
# We can't call self.__class__() as there is a class that
# inherits from CAReduce that doesn't have the same signature
axis = list(range(len(inp_bdcast)))
axis = list(axis)
for i, a in enumerate(axis):
if a >= inp_dims or a < -inp_dims:
raise ValueError(
f"Not enough dimensions on {input} to reduce on axis {a}"
)
if a < 0:
copy_op = True
axis[i] = a + inp_dims
# We can't call self.__class__() as there is a class that
# inherits from CAReduce that doesn't have the same signature
if copy_op:
op = copy(self)
op.set_ufunc(op.scalar_op)
op.axis = axis
assert len(axis) == len(self.axis)
op.axis = tuple(axis)
else:
op = self
broadcastable = [
x for i, x in enumerate(input.type.broadcastable) if i not in axis
]
broadcastable = [x for i, x in enumerate(inp_bdcast) if i not in axis]
output = TensorType(
dtype=self._output_dtype(input.type.dtype), broadcastable=broadcastable
dtype=self._output_dtype(inp_dtype), broadcastable=broadcastable
)()
return Apply(op, [input], [output])
def __getstate__(self):
......@@ -1420,38 +1390,25 @@ class CAReduce(COp):
axis = self.axis
if axis is None:
axis = list(range(input.ndim))
variable = input
to_reduce = reversed(sorted(axis))
if hasattr(self, "acc_dtype") and self.acc_dtype is not None:
acc_dtype = self.acc_dtype
else:
acc_dtype = node.outputs[0].type.dtype
if to_reduce:
for dimension in to_reduce:
# If it's a zero-sized array, use scalar_op.identity
# if available
if variable.shape[dimension] == 0:
if hasattr(self.scalar_op, "identity"):
# Compute the shape of the output
v_shape = list(variable.shape)
del v_shape[dimension]
variable = np.empty(tuple(v_shape), dtype=acc_dtype)
variable.fill(self.scalar_op.identity)
else:
raise ValueError(
f"Input ({variable}) has zero-size on axis {dimension}, but "
f"self.scalar_op ({self.scalar_op}) has no attribute 'identity'"
)
else:
variable = self.ufunc.reduce(variable, dimension, dtype=acc_dtype)
variable = np.array(input, dtype=acc_dtype)
variable = np.asarray(variable)
if np.may_share_memory(variable, input):
# perhaps numpy is clever for reductions of size 1?
# We don't want this.
variable = variable.copy()
if axis:
# Reducing functions built using np.frompyfunc() do not
# support reduction along multiple axes. Hence loop through
# each, otherwise numpy's inbuilt reduction functions
# support reduction along multiple axes directly.
if self.ufunc_is_vectorized:
to_reduce = reversed(sorted(axis))
for dimension in to_reduce:
variable = self.ufunc.reduce(variable, dimension, dtype=acc_dtype)
else:
variable = self.ufunc.reduce(variable, axis=tuple(axis))
output[0] = _asarray(variable, dtype=node.outputs[0].type.dtype)
else:
# Force a copy
......@@ -1559,60 +1516,30 @@ class CAReduce(COp):
dict(sub, lv0=aname),
)
if hasattr(self.scalar_op, "identity"):
identity = self.scalar_op.identity
elif self.scalar_op in [scalar_maximum, scalar_minimum]:
if self.scalar_op == scalar_maximum:
scal_name = "maximum"
if input.type.dtype in ["float32", "float64"]:
identity = "-__builtin_inf()"
elif input.type.dtype.startswith("uint") or input.type.dtype == "bool":
# numpy does not define NPY_MIN_UINT* and NPY_MIN_BOOL
identity = "0"
else:
identity = "NPY_MIN_" + str(input.type.dtype).upper()
if self.scalar_op == scalar_minimum:
scal_name = "minimum"
if input.type.dtype in ["float32", "float64"]:
identity = "__builtin_inf()"
elif input.type.dtype == "bool":
# numpy does not define NPY_MAX_BOOL
identity = "1"
else:
identity = "NPY_MAX_" + str(input.type.dtype).upper()
fail = sub["fail"]
pattern = [0] * len(node.inputs[0].broadcastable)
axis = self.axis
if axis is None:
axis = list(range(len(pattern)))
for i in axis:
pattern[i] = 1
pattern_ = str(pattern)[1:-1]
decl += """int tosum[]={%(pattern_)s};""" % locals()
alloc += (
"""
for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
PyErr_Format(PyExc_ValueError,
"Input of CAReduce{%(scal_name)s} has zero-size on axis %%d",i);
%(fail)s;
}
}
"""
% locals()
)
else:
raise TypeError("The CAReduce.scalar_op must have an identity field.")
identity = self.scalar_op.identity
if np.isposinf(identity):
if input.type.dtype in ["float32", "float64"]:
identity = "__builtin_inf()"
elif input.type.dtype.startswith("uint") or input.type.dtype == "bool":
identity = "1"
else:
identity = "NPY_MAX_" + str(input.type.dtype).upper()
elif np.isneginf(identity):
if input.type.dtype in ["float32", "float64"]:
identity = "-__builtin_inf()"
elif input.type.dtype.startswith("uint") or input.type.dtype == "bool":
identity = "0"
else:
identity = "NPY_MIN_" + str(input.type.dtype).upper()
elif identity is None:
raise TypeError(f"The {self.scalar_op} does not define an identity.")
task0_decl = (
"%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
"%(name)s_i = %(identity)s;"
% dict(dtype=adtype, name=aname, identity=identity)
f"{adtype}& {aname}_i = *{aname}_iter;\n" f"{aname}_i = {identity};"
)
task1_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % dict(
dtype=idtype, name=inames[0]
)
task1_decl = f"{idtype}& {inames[0]}_i = *{inames[0]}_iter;\n"
task1_code = self.scalar_op.c_code(
Apply(
......@@ -1631,15 +1558,12 @@ class CAReduce(COp):
[f"{aname}_i"],
sub,
)
code1 = (
"""
{
%(task1_decl)s
%(task1_code)s
}
code1 = f"""
{{
{task1_decl}
{task1_code}
}}
"""
% locals()
)
if node.inputs[0].type.ndim:
if len(axis) == 1:
......@@ -1662,11 +1586,9 @@ class CAReduce(COp):
end = ""
if adtype != odtype:
end = """
PyArray_CopyInto(%(oname)s, %(aname)s);
""" % dict(
oname=oname, aname=aname
)
end = f"""
PyArray_CopyInto({oname}, {aname});
"""
end += acc_type.c_cleanup(aname, sub)
return decl, checks, alloc, loop, end
......@@ -1681,7 +1603,7 @@ class CAReduce(COp):
def c_code_cache_version_apply(self, node):
# the version corresponding to the c code in this Op
version = [8]
version = [9]
# now we insert versions for the ops on which we depend...
scalar_node = Apply(
......
......@@ -585,14 +585,45 @@ def max_and_argmax(a, axis=None, keepdims=False):
return [out, argout]
class Max(CAReduce):
class NonZeroCAReduce(CAReduce):
def _c_all(self, node, name, inames, onames, sub):
decl, checks, alloc, loop, end = super()._c_all(node, name, inames, onames, sub)
# We add an additional check for zero-sized dimensions (This seems like
# something that could enabled in `elemwise_cgen.make_checks`.)
iname = inames[0]
axis = self.axis
if axis is None:
axis = list(range(len(node.inputs[0].type.broadcastable)))
pattern = [0] * len(node.inputs[0].broadcastable)
for i in axis:
pattern[i] = 1
pattern_ = str(pattern)[1:-1]
decl += f"""int tosum[]={{{pattern_}}};"""
alloc += f"""
for(int i=0;i<PyArray_NDIM({iname});i++){{
if(PyArray_DIMS({iname})[i]==0 && tosum[i]){{
PyErr_Format(PyExc_ValueError,
"Input of CAReduce{{{node.op.scalar_op}}} has zero-size on axis %%d",i);
{sub["fail"]};
}}
}}
"""
return decl, checks, alloc, loop, end
class Max(NonZeroCAReduce):
nfunc_spec = ("max", 1, 1)
def __init__(self, axis):
super().__init__(aes.scalar_maximum, axis)
class Min(CAReduce):
class Min(NonZeroCAReduce):
nfunc_spec = ("min", 1, 1)
def __init__(self, axis):
......
......@@ -60,6 +60,7 @@ from aesara.tensor.math import (
All,
Any,
Dot,
NonZeroCAReduce,
Prod,
ProdWithoutZeros,
Sum,
......@@ -1534,14 +1535,18 @@ def local_op_of_op(fgraph, node):
return [combined(node_inps.owner.inputs[0])]
ALL_REDUCE = [
CAReduce,
All,
Any,
Sum,
Prod,
ProdWithoutZeros,
] + CAReduce.__subclasses__()
ALL_REDUCE = (
[
CAReduce,
All,
Any,
Sum,
Prod,
ProdWithoutZeros,
]
+ CAReduce.__subclasses__()
+ NonZeroCAReduce.__subclasses__()
)
@register_canonicalize
......
......@@ -372,7 +372,7 @@ class TestCAReduce(unittest_tools.InferShapeTester):
zv = xv
if pre_scalar_op is not None:
zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv})
numpy_raised = False
if len(tosum) > 1 and any([a < 0 for a in tosum]):
# In that case, we need to use the good order of axis
# in the reduction.
......@@ -404,17 +404,19 @@ class TestCAReduce(unittest_tools.InferShapeTester):
for axis in reversed(sorted(tosum)):
zv = np.multiply.reduce(zv, axis)
elif scalar_op == aes.scalar_maximum:
try:
for axis in reversed(sorted(tosum)):
zv = np.maximum.reduce(zv, axis)
except ValueError:
numpy_raised = True
# There is no identity value for the maximum function
# So we can't support shape of dimensions 0.
if np.prod(zv.shape) == 0:
continue
for axis in reversed(sorted(tosum)):
zv = np.maximum.reduce(zv, axis)
elif scalar_op == aes.scalar_minimum:
try:
for axis in reversed(sorted(tosum)):
zv = np.minimum.reduce(zv, axis)
except ValueError:
numpy_raised = True
# There is no identity value for the minimum function
# So we can't support shape of dimensions 0.
if np.prod(zv.shape) == 0:
continue
for axis in reversed(sorted(tosum)):
zv = np.minimum.reduce(zv, axis)
elif scalar_op == aes.or_:
for axis in reversed(sorted(tosum)):
zv = np.bitwise_or.reduce(zv, axis)
......@@ -432,24 +434,21 @@ class TestCAReduce(unittest_tools.InferShapeTester):
raise Exception(
f"Test for CAReduce with scalar_op {scalar_op} not implemented"
)
if scalar_op in [aes.scalar_maximum, aes.scalar_minimum] and numpy_raised:
with pytest.raises(ValueError):
f(xv)
if test_nan:
try:
assert self.type.values_eq(f(xv), zv), (f(xv), zv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
else:
if test_nan:
try:
assert self.type.values_eq(f(xv), zv), (f(xv), zv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
else:
try:
f_xv = f(xv)
assert f_xv.shape == zv.shape, (f_xv, zv)
utt.assert_allclose(zv, f_xv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
try:
f_xv = f(xv)
assert f_xv.shape == zv.shape, (f_xv, zv)
utt.assert_allclose(zv, f_xv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
if tensor_op is None:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论