Use NumPy C API to perform DimShuffle steps in its C implementation

e593b0ac · Brandon T. Willard · Brandon T. Willard · 223ee154 · e593b0ac · e593b0ac
--- a/aesara/gpuarray/elemwise.py
+++ b/aesara/gpuarray/elemwise.py
@@ -468,7 +468,7 @@ class GpuDimShuffle(DimShuffle):

        res = input

-        res = res.transpose(self.shuffle + self.drop)
+        res = res.transpose(self.transposition)

        shape = list(res.shape[: len(self.shuffle)])
        for augm in self.augment:

--- a/aesara/link/jax/dispatch.py
+++ b/aesara/link/jax/dispatch.py
@@ -710,7 +710,7 @@ def jax_funcify_Reshape(op, **kwargs):
 def jax_funcify_DimShuffle(op, **kwargs):
    def dimshuffle(x):

-        res = jnp.transpose(x, op.shuffle + op.drop)
+        res = jnp.transpose(x, op.transposition)

        shape = list(res.shape[: len(op.shuffle)])


--- a/aesara/link/numba/dispatch/elemwise.py
+++ b/aesara/link/numba/dispatch/elemwise.py
@@ -319,7 +319,7 @@ def numba_funcify_CAReduce(op, node, **kwargs):
 @numba_funcify.register(DimShuffle)
 def numba_funcify_DimShuffle(op, **kwargs):
    shuffle = tuple(op.shuffle)
-    drop = tuple(op.drop)
+    transposition = tuple(op.transposition)
    augment = tuple(op.augment)
    inplace = op.inplace

@@ -352,7 +352,7 @@ def numba_funcify_DimShuffle(op, **kwargs):

        @numba.njit
        def dimshuffle_inner(x, shuffle):
-            res = np.transpose(x, shuffle + drop)
+            res = np.transpose(x, transposition)
            shuffle_shape = res.shape[: len(shuffle)]

            new_shape = create_zeros_tuple()

--- a/aesara/tensor/c_code/dimshuffle.c
+++ b/aesara/tensor/c_code/dimshuffle.c
 #section support_code_apply

-int APPLY_SPECIFIC(cpu_dimshuffle)(PyArrayObject* input, PyArrayObject** res, PARAMS_TYPE* params) {
-    npy_bool* input_broadcastable;
-    npy_int64* new_order;
-    npy_intp nd_in;
-    npy_intp nd_out;
-    PyArrayObject* basename;
-    npy_intp* dimensions;
-    npy_intp* strides;
-
-    if (!PyArray_IS_C_CONTIGUOUS(params->input_broadcastable)) {
-        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param input_broadcastable must be C-contiguous.");
-        return 1;
-    }
-    if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
-        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
-        return 1;
-    }
-    input_broadcastable = (npy_bool*) PyArray_DATA(params->input_broadcastable);
-    new_order = (npy_int64*) PyArray_DATA(params->_new_order);
-    nd_in = PyArray_SIZE(params->input_broadcastable);
-    nd_out = PyArray_SIZE(params->_new_order);
-
-    /* check_input_nd */
-    if (PyArray_NDIM(input) != nd_in) {
-        PyErr_SetString(PyExc_NotImplementedError, "input nd");
-        return 1;
-    }
+int APPLY_SPECIFIC(cpu_dimshuffle)(PyArrayObject *input, PyArrayObject **res,
+                                   PARAMS_TYPE *params) {
+
+  // This points to either the original input or a copy we create below.
+  // Either way, this is what we should be working on/with.
+  PyArrayObject *_input;

-    /* clear_output */
  if (*res)
    Py_XDECREF(*res);

-    /* get_base */
  if (params->inplace) {
-        basename = input;
-        Py_INCREF((PyObject*)basename);
+    _input = input;
+    Py_INCREF((PyObject *)_input);
  } else {
-        basename =
-            (PyArrayObject*)PyArray_FromAny((PyObject*)input,
-                                            NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL);
+    _input = (PyArrayObject *)PyArray_FromAny(
+        (PyObject *)input, NULL, 0, 0, NPY_ARRAY_ALIGNED | NPY_ARRAY_ENSURECOPY,
+        NULL);
  }

-    /* shape_statements and strides_statements */
-    dimensions = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
-    strides = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
-    if (dimensions == NULL || strides == NULL) {
-        PyErr_NoMemory();
-        free(dimensions);
-        free(strides);
-        return 1;
-    };
+  PyArray_Dims permute;

-    for (npy_intp i = 0; i < nd_out; ++i) {
-        if (new_order[i] != -1) {
-            dimensions[i] = PyArray_DIMS(basename)[new_order[i]];
-            strides[i] = PyArray_DIMS(basename)[new_order[i]] == 1 ?
-                            0 : PyArray_STRIDES(basename)[new_order[i]];
-        } else {
-            dimensions[i] = 1;
-            strides[i] = 0;
+  if (!PyArray_IntpConverter((PyObject *)params->transposition, &permute)) {
+    return 1;
  }
+
+  /*
+    res = res.transpose(self.transposition)
+  */
+  PyArrayObject *transposed_input =
+      (PyArrayObject *)PyArray_Transpose(_input, &permute);
+
+  PyDimMem_FREE(permute.ptr);
+
+  npy_intp *res_shape = PyArray_DIMS(transposed_input);
+  npy_intp N_shuffle = PyArray_SIZE(params->shuffle);
+  npy_intp N_augment = PyArray_SIZE(params->augment);
+  npy_intp N = N_augment + N_shuffle;
+  npy_intp *_reshape_shape = (npy_intp *)malloc(N * sizeof(npy_intp));
+
+  if (_reshape_shape == NULL) {
+    PyErr_NoMemory();
+    free(_reshape_shape);
+    return 1;
  }

-    /* set the strides of the broadcasted dimensions.
-     * This algorithm is from numpy: PyArray_Newshape() in
-     * cvs/numpy/numpy/core/src/multiarraymodule.c */
-    if (nd_out > 0) {
-        if (strides[nd_out - 1] == 0)
-            strides[nd_out - 1] = PyArray_DESCR(basename)->elsize;
-        for (npy_intp i = nd_out - 2; i > -1; --i) {
-            if (strides[i] == 0)
-                strides[i] = strides[i + 1] * dimensions[i + 1];
+  /*
+    shape = list(res.shape[: len(self.shuffle)])
+    for augm in self.augment:
+        shape.insert(augm, 1)
+  */
+  npy_intp aug_idx = 0;
+  int res_idx = 0;
+  for (npy_intp i = 0; i < N; i++) {
+    if (aug_idx < N_augment &&
+        i == *((npy_intp *)PyArray_GetPtr(params->augment, &aug_idx))) {
+      _reshape_shape[i] = 1;
+      aug_idx++;
+    } else {
+      _reshape_shape[i] = res_shape[res_idx];
+      res_idx++;
    }
  }

-    /* close_bracket */
-    // create a new array.
-    *res = (PyArrayObject*)PyArray_New(&PyArray_Type, nd_out, dimensions,
-                                       PyArray_TYPE(basename), strides,
-                                       PyArray_DATA(basename), PyArray_ITEMSIZE(basename),
-                                       // borrow only the writable flag from the base
-                                       // the NPY_OWNDATA flag will default to 0.
-                                       (NPY_ARRAY_WRITEABLE * PyArray_ISWRITEABLE(basename)),
-                                       NULL);
+  PyArray_Dims reshape_shape = {.ptr = _reshape_shape, .len = (int)N};

-    if (*res == NULL) {
-        free(dimensions);
-        free(strides);
-        return 1;
-    }
+  /* res = res.reshape(shape) */
+  *res = (PyArrayObject *)PyArray_Newshape(transposed_input, &reshape_shape,
+                                           NPY_CORDER);

-    // recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
-    PyArray_UpdateFlags(*res, NPY_ARRAY_UPDATE_ALL);
+  /* Py_XDECREF(transposed_input); */

-    // we are making a view in both inplace and non-inplace cases
-    PyArray_SetBaseObject(*res, (PyObject*)basename);
+  PyDimMem_FREE(reshape_shape.ptr);

-    free(strides);
-    free(dimensions);
+  if (!*res) {
+    return 1;
+  }

  return 0;
 }
--- a/aesara/tensor/elemwise.py
+++ b/aesara/tensor/elemwise.py
@@ -119,47 +119,27 @@ class DimShuffle(ExternalCOp):

    @property
    def params_type(self):
-        # We can't directly create `params_type` as class attribute
-        # because of importation issues related to TensorType.
        return ParamsType(
-            input_broadcastable=TensorType(dtype="bool", broadcastable=(False,)),
-            _new_order=lvector,
-            transposition=TensorType(dtype="uint32", broadcastable=(False,)),
+            shuffle=lvector,
+            augment=lvector,
+            transposition=lvector,
            inplace=scalar_bool,
        )

-    @property
-    def _new_order(self):
-        # Param for C code.
-        # self.new_order may contain 'x', which is not a valid integer value.
-        # We replace it with -1.
-        return [(-1 if x == "x" else x) for x in self.new_order]
-
-    @property
-    def transposition(self):
-        return self.shuffle + self.drop
-
-    def __init__(self, input_broadcastable, new_order, inplace=True):
+    def __init__(self, input_broadcastable, new_order):
        super().__init__([self.c_func_file], self.c_func_name)
+
        self.input_broadcastable = tuple(input_broadcastable)
        self.new_order = tuple(new_order)
-        if inplace is True:
-            self.inplace = inplace
-        else:
-            raise ValueError(
-                "DimShuffle is inplace by default and hence the inplace for DimShuffle must be true"
-            )
+
+        self.inplace = True

        for i, j in enumerate(new_order):
            if j != "x":
-                # There is a bug in numpy that results in
-                # isinstance(x, integer_types) returning False for
-                # numpy integers.  See
-                # <http://projects.scipy.org/numpy/ticket/2235>.
                if not isinstance(j, (int, np.integer)):
                    raise TypeError(
-                        "DimShuffle indices must be python ints. "
-                        f"Got: '{j}' of type '{type(j)}'."
+                        "DimShuffle indices must be Python ints; got "
+                        f"{j} of type {type(j)}."
                    )
                if j >= len(input_broadcastable):
                    raise ValueError(
@@ -169,31 +149,30 @@ class DimShuffle(ExternalCOp):
                if j in new_order[(i + 1) :]:
                    raise ValueError(
                        "The same input dimension may not appear "
-                        "twice in the list of output dimensions",
-                        new_order,
+                        f"twice in the list of output dimensions: {new_order}"
                    )

-        # list of dimensions of the input to drop
-        self.drop = []
+        # List of input dimensions to drop
+        drop = []
        for i, b in enumerate(input_broadcastable):
            if i not in new_order:
-                # we want to drop this dimension because it's not a value in
-                # new_order
-                if b == 1:  # 1 aka True
-                    self.drop.append(i)
+                # We want to drop this dimension because it's not a value in
+                # `new_order`
+                if b == 1:
+                    drop.append(i)
                else:
-                    # we cannot drop non-broadcastable dimensions
+                    # We cannot drop non-broadcastable dimensions
                    raise ValueError(
-                        "You cannot drop a non-broadcastable dimension:",
-                        f" {input_broadcastable}, {new_order}",
+                        "Cannot drop a non-broadcastable dimension: "
+                        f"{input_broadcastable}, {new_order}"
                    )

-        # this is the list of the original dimensions that we keep
+        # This is the list of the original dimensions that we keep
        self.shuffle = [x for x in new_order if x != "x"]
-
-        # list of dimensions of the output that are broadcastable and were not
+        self.transposition = self.shuffle + drop
+        # List of dimensions of the output that are broadcastable and were not
        # in the original input
-        self.augment = [i for i, x in enumerate(new_order) if x == "x"]
+        self.augment = sorted([i for i, x in enumerate(new_order) if x == "x"])

        if self.inplace:
            self.view_map = {0: [0]}
@@ -241,27 +220,23 @@ class DimShuffle(ExternalCOp):
            return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)

    def perform(self, node, inp, out, params):
-        (input,) = inp
+        (res,) = inp
        (storage,) = out
-        # drop
-        res = input
+
        if type(res) != np.ndarray and type(res) != np.memmap:
            raise TypeError(res)

-        # transpose
-        res = res.transpose(self.shuffle + self.drop)
+        res = res.transpose(self.transposition)

-        # augment
        shape = list(res.shape[: len(self.shuffle)])
        for augm in self.augment:
            shape.insert(augm, 1)
        res = res.reshape(shape)

-        # copy (if not inplace)
        if not self.inplace:
            res = np.copy(res)

-        storage[0] = np.asarray(res)  # asarray puts scalars back into array
+        storage[0] = np.asarray(res)

    def infer_shape(self, fgraph, node, shapes):
        (ishp,) = shapes

--- a/aesara/tensor/inplace.py
+++ b/aesara/tensor/inplace.py
@@ -399,4 +399,4 @@ pprint.assign(pow_inplace, printing.OperatorPrinter("**=", 1, "right"))
 def transpose_inplace(x, **kwargs):
    "Perform a transpose on a tensor without copying the underlying storage"
    dims = list(range(x.ndim - 1, -1, -1))
-    return DimShuffle(x.broadcastable, dims, inplace=True)(x)
+    return DimShuffle(x.broadcastable, dims)(x)
--- a/tests/link/test_jax.py
+++ b/tests/link/test_jax.py
@@ -856,7 +856,7 @@ def test_jax_Dimshuffle():
    compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)])

    a_aet = tensor(dtype=config.floatX, broadcastable=[False, True])
-    x = aet_elemwise.DimShuffle([False, True], (0,), inplace=True)(a_aet)
+    x = aet_elemwise.DimShuffle([False, True], (0,))(a_aet)
    x_fg = FunctionGraph([a_aet], [x])
    compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)])


--- a/tests/link/test_numba.py
+++ b/tests/link/test_numba.py
@@ -653,7 +653,7 @@ def test_AllocDiag(v, offset):


 @pytest.mark.parametrize(
-    "v, new_order, inplace",
+    "v, new_order",
    [
        # `{'drop': [], 'shuffle': [], 'augment': [0, 1]}`
        (
@@ -662,7 +662,6 @@ def test_AllocDiag(v, offset):
                np.array(1, dtype=np.int64),
            ),
            ("x", "x"),
-            True,
        ),
        # I.e. `a_aet.T`
        # `{'drop': [], 'shuffle': [1, 0], 'augment': []}`
@@ -671,7 +670,6 @@ def test_AllocDiag(v, offset):
                aet.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
            ),
            (1, 0),
-            True,
        ),
        # `{'drop': [], 'shuffle': [0, 1], 'augment': [2]}`
        (
@@ -679,7 +677,6 @@ def test_AllocDiag(v, offset):
                aet.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
            ),
            (1, 0, "x"),
-            True,
        ),
        # `{'drop': [1], 'shuffle': [2, 0], 'augment': [0, 2, 4]}`
        (
@@ -688,7 +685,6 @@ def test_AllocDiag(v, offset):
                np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=config.floatX),
            ),
            ("x", 2, "x", 0, "x"),
-            True,
        ),
        # I.e. `a_aet.dimshuffle((0,))`
        # `{'drop': [1], 'shuffle': [0], 'augment': []}`
@@ -698,7 +694,6 @@ def test_AllocDiag(v, offset):
                np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX),
            ),
            (0,),
-            True,
        ),
        (
            set_test_value(
@@ -706,7 +701,6 @@ def test_AllocDiag(v, offset):
                np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX),
            ),
            (0,),
-            True,
        ),
        (
            set_test_value(
@@ -714,12 +708,11 @@ def test_AllocDiag(v, offset):
                np.array([[[1.0]]], dtype=config.floatX),
            ),
            (),
-            True,
        ),
    ],
 )
-def test_Dimshuffle(v, new_order, inplace):
-    g = aet_elemwise.DimShuffle(v.broadcastable, new_order, inplace=inplace)(v)
+def test_Dimshuffle(v, new_order):
+    g = aet_elemwise.DimShuffle(v.broadcastable, new_order)(v)
    g_fg = FunctionGraph(outputs=[g])
    compare_numba_and_py(
        g_fg,

--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -52,12 +52,12 @@ class TestDimShuffle(unittest_tools.InferShapeTester):
            ib = [(entry == 1) for entry in xsh]
            x = self.type(self.dtype, ib)("x")
            e = self.op(ib, shuffle)(x)
-            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
+            f = aesara.function([x], e, mode=Mode(linker=linker))
            assert f(np.ones(xsh, dtype=self.dtype)).shape == zsh
            # test that DimShuffle.infer_shape work correctly
            x = self.type(self.dtype, ib)("x")
            e = self.op(ib, shuffle)(x)
-            f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
+            f = aesara.function([x], e.shape, mode=Mode(linker=linker))
            assert all(f(np.ones(xsh, dtype=self.dtype))) == all(zsh)

        # Test when we drop a axis that is not broadcastable
@@ -70,7 +70,7 @@ class TestDimShuffle(unittest_tools.InferShapeTester):
        ib = [True, True, False]
        x = self.type(self.dtype, ib)("x")
        e = self.op(ib, (1, 2))(x)
-        f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
+        f = aesara.function([x], e.shape, mode=Mode(linker=linker))
        with pytest.raises(TypeError):
            f(np.ones((2, 1, 4)))

@@ -119,6 +119,25 @@ class TestDimShuffle(unittest_tools.InferShapeTester):
        with pytest.raises(ValueError):
            y.eval({x: 0})

+    def test_c_views(self):
+        x_at = vector()
+        thunk, inputs, outputs = (
+            CLinker().accept(FunctionGraph([x_at], [x_at[None]])).make_thunk()
+        )
+
+        # This is a little hackish, but we're hoping that--by running this more than
+        # a few times--we're more likely to run into random memory that isn't the same
+        # as the broadcasted value; that way, we'll be able to tell that we're getting
+        # junk data from a poorly constructed array view.
+        x_val = np.broadcast_to(2039, (5000,))
+        for i in range(1000):
+            inputs[0].storage[0] = x_val
+            thunk()
+            # Make sure it's a view of the original data
+            assert np.shares_memory(x_val, outputs[0].storage[0])
+            # Confirm the broadcasted value in the output
+            assert np.array_equiv(outputs[0].storage[0], 2039)
+

 class TestBroadcast:
    # this is to allow other types to reuse this class to test their ops