Unverified 提交 84936418 authored 作者: Kaustubh's avatar Kaustubh 提交者: GitHub

Refactor and simplify CAReduce, Max, and Min Ops (#297)

上级 584c0c15
...@@ -3024,11 +3024,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -3024,11 +3024,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
""" """
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None): def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, "identity"): if scalar_op.identity is None:
raise ValueError("No identity on scalar op") raise ValueError("No identity on scalar op")
CAReduceDtype.__init__( super().__init__(scalar_op, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
self, scalar_op, axis=axis, dtype=dtype, acc_dtype=acc_dtype
)
def __str__(self): def __str__(self):
ax = "" ax = ""
...@@ -3038,7 +3036,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -3038,7 +3036,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def make_node(self, input): def make_node(self, input):
ctx_name = infer_context_name(input) ctx_name = infer_context_name(input)
res = CAReduceDtype.make_node(self, input) res = super().make_node(input)
input = as_gpuarray_variable(input, ctx_name) input = as_gpuarray_variable(input, ctx_name)
otype = GpuArrayType( otype = GpuArrayType(
dtype=res.outputs[0].dtype, dtype=res.outputs[0].dtype,
......
...@@ -1259,19 +1259,19 @@ class UnaryScalarOp(ScalarOp): ...@@ -1259,19 +1259,19 @@ class UnaryScalarOp(ScalarOp):
class BinaryScalarOp(ScalarOp): class BinaryScalarOp(ScalarOp):
# One may define in subclasses the following fields: # One may define in subclasses the following fields:
# - `identity`: for an associative operation, identity corresponds to
# the neutral element. For instance, it will be 0 for addition, 1 for
# multiplication, True for "and", False for "or".
# - `commutative`: whether op(a, b) == op(b, a) # - `commutative`: whether op(a, b) == op(b, a)
# - `associative`: whether op(op(a, b), c) == op(a, op(b, c)) # - `associative`: whether op(op(a, b), c) == op(a, op(b, c))
commutative = None
associative = None
identity = None
"""
For an associative operation, the identity object corresponds to the neutral
element. For instance, it will be ``0`` for addition, ``1`` for multiplication,
``True`` for ``and``, ``False`` for ``or``.
"""
nin = 2 nin = 2
###############
# Comparisons
###############
class LogicalComparison(BinaryScalarOp): class LogicalComparison(BinaryScalarOp):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
BinaryScalarOp.__init__(self, *args, **kwargs) BinaryScalarOp.__init__(self, *args, **kwargs)
...@@ -1725,6 +1725,7 @@ class ScalarMaximum(BinaryScalarOp): ...@@ -1725,6 +1725,7 @@ class ScalarMaximum(BinaryScalarOp):
associative = True associative = True
nfunc_spec = ("maximum", 2, 1) nfunc_spec = ("maximum", 2, 1)
nfunc_variadic = "maximum" nfunc_variadic = "maximum"
identity = -np.inf
def impl(self, *inputs): def impl(self, *inputs):
# The built-in max function don't support complex type # The built-in max function don't support complex type
...@@ -1767,6 +1768,7 @@ class ScalarMinimum(BinaryScalarOp): ...@@ -1767,6 +1768,7 @@ class ScalarMinimum(BinaryScalarOp):
associative = True associative = True
nfunc_spec = ("minimum", 2, 1) nfunc_spec = ("minimum", 2, 1)
nfunc_variadic = "minimum" nfunc_variadic = "minimum"
identity = np.inf
def impl(self, *inputs): def impl(self, *inputs):
# The built-in min function don't support complex type # The built-in min function don't support complex type
......
...@@ -15,19 +15,10 @@ from aesara.misc.frozendict import frozendict ...@@ -15,19 +15,10 @@ from aesara.misc.frozendict import frozendict
from aesara.misc.safe_asarray import _asarray from aesara.misc.safe_asarray import _asarray
from aesara.printing import FunctionPrinter, pprint from aesara.printing import FunctionPrinter, pprint
from aesara.scalar import get_scalar_type from aesara.scalar import get_scalar_type
from aesara.scalar.basic import ( from aesara.scalar.basic import Scalar
AND,
OR,
XOR,
Add,
Mul,
Scalar,
ScalarMaximum,
ScalarMinimum,
)
from aesara.scalar.basic import bool as scalar_bool from aesara.scalar.basic import bool as scalar_bool
from aesara.scalar.basic import identity as scalar_identity from aesara.scalar.basic import identity as scalar_identity
from aesara.scalar.basic import scalar_maximum, scalar_minimum, transfer_type, upcast from aesara.scalar.basic import transfer_type, upcast
from aesara.tensor import elemwise_cgen as cgen from aesara.tensor import elemwise_cgen as cgen
from aesara.tensor.type import ( from aesara.tensor.type import (
TensorType, TensorType,
...@@ -1209,7 +1200,7 @@ second dimension ...@@ -1209,7 +1200,7 @@ second dimension
""" """
% locals() % locals()
) )
return decl, checks, alloc, loop return decl, checks, alloc, loop, ""
def c_code(self, node, nodename, inames, onames, sub): def c_code(self, node, nodename, inames, onames, sub):
if ( if (
...@@ -1310,47 +1301,26 @@ class CAReduce(COp): ...@@ -1310,47 +1301,26 @@ class CAReduce(COp):
raise NotImplementedError( raise NotImplementedError(
"CAReduce only supports binary functions with a single " "output." "CAReduce only supports binary functions with a single " "output."
) )
self.scalar_op = scalar_op
if axis is None:
self.axis = axis
# There is a bug in numpy that results in isinstance(x,
# integer_types) returning False for numpy integers. See
# <http://projects.scipy.org/numpy/ticket/2235>.
elif isinstance(axis, (int, np.integer)):
self.axis = (axis,)
elif isinstance(axis, np.ndarray) and axis.ndim == 0:
self.axis = (int(axis),)
else:
self.axis = list({int(a) for a in axis})
self.axis.sort()
self.axis = tuple(self.axis)
self.axis = None
self.ufunc_is_vectorized = False
self.scalar_op = scalar_op
self.set_ufunc(scalar_op) self.set_ufunc(scalar_op)
if axis is not None:
if isinstance(axis, (int, np.integer)) or (
isinstance(axis, np.ndarray) and not axis.shape
):
self.axis = (int(axis),)
else:
self.axis = tuple(axis)
def set_ufunc(self, scalar_op): def set_ufunc(self, scalar_op):
# TODO FIXME: Why would we ever do this, instead of allowing the `Op` if hasattr(scalar_op, "nfunc_spec") and hasattr(np, scalar_op.nfunc_spec[0]):
# itself to tell us which `ufunc` it should use? self.ufunc = getattr(np, scalar_op.nfunc_spec[0])
if isinstance(scalar_op, Add):
self.ufunc = np.add
elif isinstance(scalar_op, Mul):
self.ufunc = np.multiply
elif isinstance(scalar_op, ScalarMaximum):
self.ufunc = np.maximum
elif isinstance(scalar_op, ScalarMinimum):
self.ufunc = np.minimum
elif isinstance(scalar_op, AND) and _numpy_ver >= [1, 12]:
# numpy.bitwise_and.identity was incorrect for versions before
# 1.12 (it was 1 instead of -1), so we skip it in that case.
# We will fall back to the "else:" case, which defines a
# ufunc without identity.
self.ufunc = np.bitwise_and
elif isinstance(scalar_op, OR):
self.ufunc = np.bitwise_or
elif isinstance(scalar_op, XOR):
self.ufunc = np.bitwise_xor
else: else:
self.ufunc = np.frompyfunc(scalar_op.impl, 2, 1) self.ufunc = np.frompyfunc(scalar_op.impl, 2, 1)
self.ufunc_is_vectorized = True
def _output_dtype(self, input_dtype): def _output_dtype(self, input_dtype):
return input_dtype return input_dtype
...@@ -1359,41 +1329,41 @@ class CAReduce(COp): ...@@ -1359,41 +1329,41 @@ class CAReduce(COp):
from aesara.tensor.basic import as_tensor_variable from aesara.tensor.basic import as_tensor_variable
input = as_tensor_variable(input) input = as_tensor_variable(input)
inp_dims = input.type.ndim
inp_bdcast = input.type.broadcastable
inp_dtype = input.type.dtype
copy_op = False
if self.axis is not None:
for axis in self.axis:
if axis >= input.type.ndim or (
axis < 0 and abs(axis) > input.type.ndim
):
raise ValueError(
f"Not enough dimensions on {input} to reduce on axis {axis}"
)
input = as_tensor_variable(input)
axis = self.axis axis = self.axis
if axis is None: if axis is None:
axis = list(range(len(input.type.broadcastable))) axis = list(range(len(inp_bdcast)))
if any(a < 0 for a in axis):
axis2 = [] axis = list(axis)
for a in self.axis: for i, a in enumerate(axis):
if a < 0: if a >= inp_dims or a < -inp_dims:
axis2.append(a + input.type.ndim) raise ValueError(
else: f"Not enough dimensions on {input} to reduce on axis {a}"
axis2.append(a) )
assert len(axis) == len(axis2) if a < 0:
axis = tuple(axis2) copy_op = True
# We can't call self.__class__() as there is a class that axis[i] = a + inp_dims
# inherits from CAReduce that doesn't have the same signature
# We can't call self.__class__() as there is a class that
# inherits from CAReduce that doesn't have the same signature
if copy_op:
op = copy(self) op = copy(self)
op.set_ufunc(op.scalar_op) op.set_ufunc(op.scalar_op)
op.axis = axis assert len(axis) == len(self.axis)
op.axis = tuple(axis)
else: else:
op = self op = self
broadcastable = [
x for i, x in enumerate(input.type.broadcastable) if i not in axis broadcastable = [x for i, x in enumerate(inp_bdcast) if i not in axis]
]
output = TensorType( output = TensorType(
dtype=self._output_dtype(input.type.dtype), broadcastable=broadcastable dtype=self._output_dtype(inp_dtype), broadcastable=broadcastable
)() )()
return Apply(op, [input], [output]) return Apply(op, [input], [output])
def __getstate__(self): def __getstate__(self):
...@@ -1420,38 +1390,25 @@ class CAReduce(COp): ...@@ -1420,38 +1390,25 @@ class CAReduce(COp):
axis = self.axis axis = self.axis
if axis is None: if axis is None:
axis = list(range(input.ndim)) axis = list(range(input.ndim))
variable = input
to_reduce = reversed(sorted(axis))
if hasattr(self, "acc_dtype") and self.acc_dtype is not None: if hasattr(self, "acc_dtype") and self.acc_dtype is not None:
acc_dtype = self.acc_dtype acc_dtype = self.acc_dtype
else: else:
acc_dtype = node.outputs[0].type.dtype acc_dtype = node.outputs[0].type.dtype
if to_reduce: variable = np.array(input, dtype=acc_dtype)
for dimension in to_reduce:
# If it's a zero-sized array, use scalar_op.identity
# if available
if variable.shape[dimension] == 0:
if hasattr(self.scalar_op, "identity"):
# Compute the shape of the output
v_shape = list(variable.shape)
del v_shape[dimension]
variable = np.empty(tuple(v_shape), dtype=acc_dtype)
variable.fill(self.scalar_op.identity)
else:
raise ValueError(
f"Input ({variable}) has zero-size on axis {dimension}, but "
f"self.scalar_op ({self.scalar_op}) has no attribute 'identity'"
)
else:
variable = self.ufunc.reduce(variable, dimension, dtype=acc_dtype)
variable = np.asarray(variable) if axis:
if np.may_share_memory(variable, input): # Reducing functions built using np.frompyfunc() do not
# perhaps numpy is clever for reductions of size 1? # support reduction along multiple axes. Hence loop through
# We don't want this. # each, otherwise numpy's inbuilt reduction functions
variable = variable.copy() # support reduction along multiple axes directly.
if self.ufunc_is_vectorized:
to_reduce = reversed(sorted(axis))
for dimension in to_reduce:
variable = self.ufunc.reduce(variable, dimension, dtype=acc_dtype)
else:
variable = self.ufunc.reduce(variable, axis=tuple(axis))
output[0] = _asarray(variable, dtype=node.outputs[0].type.dtype) output[0] = _asarray(variable, dtype=node.outputs[0].type.dtype)
else: else:
# Force a copy # Force a copy
...@@ -1559,60 +1516,30 @@ class CAReduce(COp): ...@@ -1559,60 +1516,30 @@ class CAReduce(COp):
dict(sub, lv0=aname), dict(sub, lv0=aname),
) )
if hasattr(self.scalar_op, "identity"): identity = self.scalar_op.identity
identity = self.scalar_op.identity
elif self.scalar_op in [scalar_maximum, scalar_minimum]: if np.isposinf(identity):
if self.scalar_op == scalar_maximum: if input.type.dtype in ["float32", "float64"]:
scal_name = "maximum" identity = "__builtin_inf()"
if input.type.dtype in ["float32", "float64"]: elif input.type.dtype.startswith("uint") or input.type.dtype == "bool":
identity = "-__builtin_inf()" identity = "1"
elif input.type.dtype.startswith("uint") or input.type.dtype == "bool": else:
# numpy does not define NPY_MIN_UINT* and NPY_MIN_BOOL identity = "NPY_MAX_" + str(input.type.dtype).upper()
identity = "0" elif np.isneginf(identity):
else: if input.type.dtype in ["float32", "float64"]:
identity = "NPY_MIN_" + str(input.type.dtype).upper() identity = "-__builtin_inf()"
if self.scalar_op == scalar_minimum: elif input.type.dtype.startswith("uint") or input.type.dtype == "bool":
scal_name = "minimum" identity = "0"
if input.type.dtype in ["float32", "float64"]: else:
identity = "__builtin_inf()" identity = "NPY_MIN_" + str(input.type.dtype).upper()
elif input.type.dtype == "bool": elif identity is None:
# numpy does not define NPY_MAX_BOOL raise TypeError(f"The {self.scalar_op} does not define an identity.")
identity = "1"
else:
identity = "NPY_MAX_" + str(input.type.dtype).upper()
fail = sub["fail"]
pattern = [0] * len(node.inputs[0].broadcastable)
axis = self.axis
if axis is None:
axis = list(range(len(pattern)))
for i in axis:
pattern[i] = 1
pattern_ = str(pattern)[1:-1]
decl += """int tosum[]={%(pattern_)s};""" % locals()
alloc += (
"""
for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
PyErr_Format(PyExc_ValueError,
"Input of CAReduce{%(scal_name)s} has zero-size on axis %%d",i);
%(fail)s;
}
}
"""
% locals()
)
else:
raise TypeError("The CAReduce.scalar_op must have an identity field.")
task0_decl = ( task0_decl = (
"%(dtype)s& %(name)s_i = *%(name)s_iter;\n" f"{adtype}& {aname}_i = *{aname}_iter;\n" f"{aname}_i = {identity};"
"%(name)s_i = %(identity)s;"
% dict(dtype=adtype, name=aname, identity=identity)
) )
task1_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % dict( task1_decl = f"{idtype}& {inames[0]}_i = *{inames[0]}_iter;\n"
dtype=idtype, name=inames[0]
)
task1_code = self.scalar_op.c_code( task1_code = self.scalar_op.c_code(
Apply( Apply(
...@@ -1631,15 +1558,12 @@ class CAReduce(COp): ...@@ -1631,15 +1558,12 @@ class CAReduce(COp):
[f"{aname}_i"], [f"{aname}_i"],
sub, sub,
) )
code1 = ( code1 = f"""
""" {{
{ {task1_decl}
%(task1_decl)s {task1_code}
%(task1_code)s }}
}
""" """
% locals()
)
if node.inputs[0].type.ndim: if node.inputs[0].type.ndim:
if len(axis) == 1: if len(axis) == 1:
...@@ -1662,11 +1586,9 @@ class CAReduce(COp): ...@@ -1662,11 +1586,9 @@ class CAReduce(COp):
end = "" end = ""
if adtype != odtype: if adtype != odtype:
end = """ end = f"""
PyArray_CopyInto(%(oname)s, %(aname)s); PyArray_CopyInto({oname}, {aname});
""" % dict( """
oname=oname, aname=aname
)
end += acc_type.c_cleanup(aname, sub) end += acc_type.c_cleanup(aname, sub)
return decl, checks, alloc, loop, end return decl, checks, alloc, loop, end
...@@ -1681,7 +1603,7 @@ class CAReduce(COp): ...@@ -1681,7 +1603,7 @@ class CAReduce(COp):
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
# the version corresponding to the c code in this Op # the version corresponding to the c code in this Op
version = [8] version = [9]
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply( scalar_node = Apply(
......
...@@ -585,14 +585,45 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -585,14 +585,45 @@ def max_and_argmax(a, axis=None, keepdims=False):
return [out, argout] return [out, argout]
class Max(CAReduce): class NonZeroCAReduce(CAReduce):
def _c_all(self, node, name, inames, onames, sub):
decl, checks, alloc, loop, end = super()._c_all(node, name, inames, onames, sub)
# We add an additional check for zero-sized dimensions (This seems like
# something that could enabled in `elemwise_cgen.make_checks`.)
iname = inames[0]
axis = self.axis
if axis is None:
axis = list(range(len(node.inputs[0].type.broadcastable)))
pattern = [0] * len(node.inputs[0].broadcastable)
for i in axis:
pattern[i] = 1
pattern_ = str(pattern)[1:-1]
decl += f"""int tosum[]={{{pattern_}}};"""
alloc += f"""
for(int i=0;i<PyArray_NDIM({iname});i++){{
if(PyArray_DIMS({iname})[i]==0 && tosum[i]){{
PyErr_Format(PyExc_ValueError,
"Input of CAReduce{{{node.op.scalar_op}}} has zero-size on axis %%d",i);
{sub["fail"]};
}}
}}
"""
return decl, checks, alloc, loop, end
class Max(NonZeroCAReduce):
nfunc_spec = ("max", 1, 1) nfunc_spec = ("max", 1, 1)
def __init__(self, axis): def __init__(self, axis):
super().__init__(aes.scalar_maximum, axis) super().__init__(aes.scalar_maximum, axis)
class Min(CAReduce): class Min(NonZeroCAReduce):
nfunc_spec = ("min", 1, 1) nfunc_spec = ("min", 1, 1)
def __init__(self, axis): def __init__(self, axis):
......
...@@ -60,6 +60,7 @@ from aesara.tensor.math import ( ...@@ -60,6 +60,7 @@ from aesara.tensor.math import (
All, All,
Any, Any,
Dot, Dot,
NonZeroCAReduce,
Prod, Prod,
ProdWithoutZeros, ProdWithoutZeros,
Sum, Sum,
...@@ -1534,14 +1535,18 @@ def local_op_of_op(fgraph, node): ...@@ -1534,14 +1535,18 @@ def local_op_of_op(fgraph, node):
return [combined(node_inps.owner.inputs[0])] return [combined(node_inps.owner.inputs[0])]
ALL_REDUCE = [ ALL_REDUCE = (
CAReduce, [
All, CAReduce,
Any, All,
Sum, Any,
Prod, Sum,
ProdWithoutZeros, Prod,
] + CAReduce.__subclasses__() ProdWithoutZeros,
]
+ CAReduce.__subclasses__()
+ NonZeroCAReduce.__subclasses__()
)
@register_canonicalize @register_canonicalize
......
...@@ -372,7 +372,7 @@ class TestCAReduce(unittest_tools.InferShapeTester): ...@@ -372,7 +372,7 @@ class TestCAReduce(unittest_tools.InferShapeTester):
zv = xv zv = xv
if pre_scalar_op is not None: if pre_scalar_op is not None:
zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv}) zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv})
numpy_raised = False
if len(tosum) > 1 and any([a < 0 for a in tosum]): if len(tosum) > 1 and any([a < 0 for a in tosum]):
# In that case, we need to use the good order of axis # In that case, we need to use the good order of axis
# in the reduction. # in the reduction.
...@@ -404,17 +404,19 @@ class TestCAReduce(unittest_tools.InferShapeTester): ...@@ -404,17 +404,19 @@ class TestCAReduce(unittest_tools.InferShapeTester):
for axis in reversed(sorted(tosum)): for axis in reversed(sorted(tosum)):
zv = np.multiply.reduce(zv, axis) zv = np.multiply.reduce(zv, axis)
elif scalar_op == aes.scalar_maximum: elif scalar_op == aes.scalar_maximum:
try: # There is no identity value for the maximum function
for axis in reversed(sorted(tosum)): # So we can't support shape of dimensions 0.
zv = np.maximum.reduce(zv, axis) if np.prod(zv.shape) == 0:
except ValueError: continue
numpy_raised = True for axis in reversed(sorted(tosum)):
zv = np.maximum.reduce(zv, axis)
elif scalar_op == aes.scalar_minimum: elif scalar_op == aes.scalar_minimum:
try: # There is no identity value for the minimum function
for axis in reversed(sorted(tosum)): # So we can't support shape of dimensions 0.
zv = np.minimum.reduce(zv, axis) if np.prod(zv.shape) == 0:
except ValueError: continue
numpy_raised = True for axis in reversed(sorted(tosum)):
zv = np.minimum.reduce(zv, axis)
elif scalar_op == aes.or_: elif scalar_op == aes.or_:
for axis in reversed(sorted(tosum)): for axis in reversed(sorted(tosum)):
zv = np.bitwise_or.reduce(zv, axis) zv = np.bitwise_or.reduce(zv, axis)
...@@ -432,24 +434,21 @@ class TestCAReduce(unittest_tools.InferShapeTester): ...@@ -432,24 +434,21 @@ class TestCAReduce(unittest_tools.InferShapeTester):
raise Exception( raise Exception(
f"Test for CAReduce with scalar_op {scalar_op} not implemented" f"Test for CAReduce with scalar_op {scalar_op} not implemented"
) )
if scalar_op in [aes.scalar_maximum, aes.scalar_minimum] and numpy_raised:
with pytest.raises(ValueError): if test_nan:
f(xv) try:
assert self.type.values_eq(f(xv), zv), (f(xv), zv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
else: else:
if test_nan: try:
try: f_xv = f(xv)
assert self.type.values_eq(f(xv), zv), (f(xv), zv) assert f_xv.shape == zv.shape, (f_xv, zv)
except NotImplementedError: utt.assert_allclose(zv, f_xv)
# GpuCAReduce don't implement all cases when size is 0 except NotImplementedError:
assert xv.size == 0 # GpuCAReduce don't implement all cases when size is 0
else: assert xv.size == 0
try:
f_xv = f(xv)
assert f_xv.shape == zv.shape, (f_xv, zv)
utt.assert_allclose(zv, f_xv)
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
x = self.type(dtype, [(entry == 1) for entry in xsh])("x") x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
if tensor_op is None: if tensor_op is None:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论