Unverified 提交 a0ee9a44 authored 作者: Jesse Grabowski's avatar Jesse Grabowski 提交者: GitHub

Remove deprecated AbstractConv Ops and tests (#1817)

* Remove depreciated AbstractConv Ops and tests * Remove tensor/conv from test CI * remove conv.rst
上级 482e6cc2
......@@ -84,10 +84,10 @@ jobs:
part:
- [ "*rest", "tests --ignore=tests/scan --ignore=tests/tensor --ignore=tests/xtensor --ignore=tests/link/numba" ]
- [ "scan", "tests/scan" ]
- [ "tensor *rest", "tests/tensor --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_math_scipy.py --ignore=tests/tensor/test_blas.py --ignore=tests/tensor/signal --ignore=tests/tensor/conv --ignore=tests/tensor/rewriting --ignore=tests/tensor/linalg --ignore=tests/tensor/test_nlinalg.py --ignore=tests/tensor/test_slinalg.py --ignore=tests/tensor/test_pad.py" ]
- [ "tensor *rest", "tests/tensor --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_math_scipy.py --ignore=tests/tensor/test_blas.py --ignore=tests/tensor/signal --ignore=tests/tensor/rewriting --ignore=tests/tensor/linalg --ignore=tests/tensor/test_nlinalg.py --ignore=tests/tensor/test_slinalg.py --ignore=tests/tensor/test_pad.py" ]
- [ "tensor basic+elemwise", "tests/tensor/test_basic.py tests/tensor/test_elemwise.py" ]
- [ "tensor math", "tests/tensor/test_math.py" ]
- [ "tensor scipy+blas+conv+pad", "tests/tensor/test_math_scipy.py tests/tensor/test_blas.py tests/tensor/signal tests/tensor/conv tests/tensor/test_pad.py" ]
- [ "tensor scipy+blas+pad", "tests/tensor/test_math_scipy.py tests/tensor/test_blas.py tests/tensor/signal tests/tensor/test_pad.py" ]
- [ "tensor rewriting", "tests/tensor/rewriting" ]
- [ "tensor linalg", "tests/tensor/linalg tests/tensor/test_nlinalg.py tests/tensor/test_slinalg.py" ]
exclude:
......
=========================================
:mod:`tensor.conv` -- Tensor Convolutions
=========================================
.. module:: tensor.conv
:platform: Unix, Windows
:synopsis: Tensor Convolutions
.. moduleauthor:: LISA, PyMC Developers, PyTensor Developers
.. automodule:: pytensor.tensor.conv
:members:
......@@ -270,15 +270,6 @@ def add_basic_configvars():
in_c_key=False,
)
config.add(
"conv__assert_shape",
"If True, AbstractConv* ops will verify that user-provided"
" shapes match the runtime shapes (debugging option,"
" may slow down compilation)",
BoolParam(False),
in_c_key=False,
)
config.add(
"print_global_stats",
"Print some global statistics (time spent) at the end",
......
......@@ -71,7 +71,6 @@ class PyTensorConfigParser:
pickle_test_value: bool
cast_policy: str
device: str
conv__assert_shape: bool
print_global_stats: bool
unpickle_function: bool
# add_compile_configvars
......
from .abstract_conv import (
bilinear_upsampling,
causal_conv1d,
conv2d,
conv2d_transpose,
conv3d,
frac_bilinear_upsampling,
separable_conv2d,
separable_conv3d,
)
This source diff could not be displayed because it is too large. You can view the blob instead.
// This uses a lot of code from Caffe (http://caffe.berkeleyvision.org/);
// sources are clearly marked. Below we reproduce the original license of
// the Caffe software.
/*
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp)
// Loops for fast unfold + copy
void im3d2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int depth,
const int kernel_h, const int kernel_w, const int kernel_d,
const int dilation_h, const int dilation_w, const int dilation_d,
const int pad_h, const int pad_w, const int pad_d,
const int stride_h, const int stride_w, const int stride_d,
%(float_type)s* data_col) {
// Implicit dilated kernel size
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int dil_kernel_d = (kernel_d - 1) * dilation_d + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int depth_col = (depth + 2 * pad_d - dil_kernel_d) / stride_d + 1;
int channels_col = channels * kernel_h * kernel_w * kernel_d;
for (int c = 0; c < channels_col; ++c) {
int d_offset = c %% kernel_d;
int w_offset = (c / kernel_d) %% kernel_w;
int h_offset = (c / kernel_w / kernel_d) %% kernel_h;
int c_im = c / kernel_h / kernel_w / kernel_d;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
for (int d = 0; d < depth_col; ++d) {
int d_pad = d * stride_d - pad_d + d_offset * dilation_d;
if (h_pad >= 0 && h_pad < height
&& w_pad >= 0 && w_pad < width
&& d_pad >= 0 && d_pad < depth)
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d] =
data_im[(npy_intp)((c_im * height + h_pad) * width + w_pad) * depth + d_pad];
else
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d] = 0.;
}
}
}
}
}
// Unlike the Caffe and PyTensor GPU versions, the data_im array is set to zero
// before the col2im call rather than doing it here. So, the result is just
// accumulated into data_im.
void col2im3d(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int depth,
const int patch_h, const int patch_w, const int patch_d,
const int dilation_h, const int dilation_w, const int dilation_d,
const int pad_h, const int pad_w, const int pad_d,
const int stride_h, const int stride_w, const int stride_d,
%(float_type)s* data_im) {
// Implicit dilated patch
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int dil_patch_d = (patch_d - 1) * dilation_d + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int depth_col = (depth + 2 * pad_d - dil_patch_d) / stride_d + 1;
int num_kernels = channels * height * width * depth;
int channels_col = channels * patch_h * patch_w * patch_d;
for (int c = 0; c < channels_col; ++c) {
int d_offset = c %% patch_d;
int w_offset = (c / patch_d) %% patch_w;
int h_offset = (c / patch_w / patch_d) %% patch_h;
int c_im = c / patch_h / patch_w / patch_d;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
for (int d = 0; d < depth_col; ++d) {
int d_pad = d * stride_d - pad_d + d_offset * dilation_d;
if (h_pad >= 0 && h_pad < height
&& w_pad >= 0 && w_pad < width
&& d_pad >= 0 && d_pad < depth)
data_im[(npy_intp)((c_im * height + h_pad) * width + w_pad) * depth + d_pad] +=
data_col[(npy_intp)((c * height_col + h) * width_col + w) * depth_col + d];
}
}
}
}
}
// PyTensor op code
// GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
// Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject* corr3dMM(PyArrayObject* bottom,
PyArrayObject* weight,
PyArrayObject* top,
const int direction,
const int dH = 1,
const int dW = 1,
const int dD = 1,
const int dilH = 1,
const int dilW = 1,
const int dilD = 1,
const int padH = 0,
const int padW = 0,
const int padD = 0,
const int numgroups=1)
{
if (PyArray_NDIM(bottom) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires bottom of 5D");
return NULL;
}
if (PyArray_TYPE(bottom) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received bottom with wrong type.");
return NULL;
}
if (PyArray_NDIM(weight) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires weight of 5D");
return NULL;
}
if (PyArray_TYPE(weight) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received weight with wrong type.");
return NULL;
}
if (PyArray_NDIM(top) != 5)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM requires top of 5D");
return NULL;
}
if (PyArray_TYPE(top) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "Corr3dMM received top with wrong type.");
return NULL;
}
// Ensure data is contiguous
bottom = PyArray_GETCONTIGUOUS(bottom);
weight = PyArray_GETCONTIGUOUS(weight);
top = PyArray_GETCONTIGUOUS(top);
// Extract some shape information for later and check shape consistency
// bottom: (batchSize, nChannels, bottomHeight, bottomWidth, bottomDepth)
const int batchSize = PyArray_DIMS(bottom)[0];
const int nChannels = PyArray_DIMS(bottom)[1];
const int bottomHeight = PyArray_DIMS(bottom)[2];
const int bottomWidth = PyArray_DIMS(bottom)[3];
const int bottomDepth = PyArray_DIMS(bottom)[4];
// weights: (nFilters, nChannels, rows, columns, slices)
const int nFilters = PyArray_DIMS(weight)[0];
const int kH = PyArray_DIMS(weight)[2];
const int kW = PyArray_DIMS(weight)[3];
const int kD = PyArray_DIMS(weight)[4];
if (nChannels != PyArray_DIMS(weight)[1] * numgroups) {
PyErr_SetString(PyExc_ValueError,
"Corr3dMM images and kernel must have the same stack size\n");
return NULL;
}
if ((nFilters %% numgroups) != 0) {
PyErr_SetString(PyExc_ValueError,
"CorrMM the number of filters must be divisible by the number of groups\n");
return NULL;
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
topWidth != PyArray_DIMS(top)[3] ||
topDepth != PyArray_DIMS(top)[4]) {
PyErr_Format(PyExc_ValueError,
"Corr3dMM shape inconsistency:\n"
" bottom shape: %%d %%d %%d %%d %%d\n"
" weight shape: %%d %%d %%d %%d %%d\n"
" top shape: %%ld %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d %%d)\n",
batchSize, nChannels, bottomHeight, bottomWidth, bottomDepth,
nFilters, nChannels / numgroups, kH, kW, kD,
PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
PyArray_DIMS(top)[2], PyArray_DIMS(top)[3], PyArray_DIMS(top)[4],
batchSize, nFilters, topHeight, topWidth, topDepth);
return NULL;
}
// Create temporary columns
int max_threads = %(omp_get_max_threads)s;
if (batchSize < max_threads) {
max_threads = batchSize;
}
npy_intp col_dim[3];
col_dim[0] = (npy_intp)max_threads;
col_dim[1] = (npy_intp)(nChannels * kW * kH * kD);
col_dim[2] = (npy_intp)(topHeight * topWidth * topDepth);
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
PyArrayObject* col = (PyArrayObject*)PyArray_ZEROS(3,
col_dim,
PyArray_TYPE(top),
0);
if (NULL == col) {
PyErr_Format(PyExc_RuntimeError,
"Corr3dMM failed to allocate working memory of"
" %%ld x %%ld x %%ld\n",
col_dim[0], col_dim[1], col_dim[2]);
return NULL;
}
// Define some useful variables
const int batch_bottom_stride = PyArray_STRIDES(bottom)[0]/%(n_bytes)f;
const int group_bottom_stride = (PyArray_STRIDES(bottom)[1] * nChannels / numgroups)/%(n_bytes)f;
const int batch_top_stride = PyArray_STRIDES(top)[0]/%(n_bytes)f;
const int group_top_stride = (PyArray_STRIDES(top)[1] * nFilters / numgroups)/%(n_bytes)f;
const int K_ = col_dim[1] / numgroups;
const int N_ = col_dim[2];
const int col_stride = (K_ * N_ * numgroups);
const int group_col_stride = (K_ * N_);
const int group_weight_stride = (PyArray_STRIDES(weight)[0] * nFilters / numgroups)/%(n_bytes)f;
const int M_ = nFilters / numgroups;
const %(c_float_type)s one = 1.0;
const %(c_float_type)s zero = 0.0;
char NTrans = 'N';
char Trans = 'T';
PyArrayObject *output;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top;
// valid correlation: im3d2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im3d2col
im3d2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride,
nChannels, bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
for ( int g = 0; g < numgroups; ++g){
// Second, gemm
%(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_,
&one,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride + g * group_col_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride, &K_,
&zero,
(%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_);
}
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
npy_intp weight_dim[2];
weight_dim[0] = (npy_intp)max_threads;
weight_dim[1] = (npy_intp)(M_ * K_ * numgroups);
PyArrayObject* local_weight = (PyArrayObject*)PyArray_ZEROS(2,
weight_dim, PyArray_TYPE(weight), 0);
if (NULL == local_weight)
{
PyErr_Format(PyExc_RuntimeError,
"Corr3dMM failed to allocate weight memory of %%ld x %%ld\n",
weight_dim[0], weight_dim[1]);
return NULL;
}
// valid convolution: im2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
// OMP for batch-level paralization
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im2col
im3d2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride,
nChannels, bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
for ( int g = 0; g < numgroups; ++g){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%(gemm)s(&Trans, &NTrans,
&K_, &M_, &N_,
&one,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_,
(%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_,
(n == 0) ? &zero : &one,
(%(float_type)s*)PyArray_DATA(local_weight) + g * group_weight_stride +
tid * weight_dim[1], &K_);
}
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
//aggregate weights
memset((%(float_type)s*)PyArray_DATA(weight), 0, M_ * K_*sizeof(%(float_type)s));
/*
* Put index "j" into outer loop to get the
* correct result when openmp is used.
*/
%(omp_flags)s
for(int j = 0; j < weight_dim[1]; ++j){
for(int i = 0; i < max_threads; ++i){
((%(float_type)s*)PyArray_DATA(weight))[j] +=
*((%(float_type)s*)PyArray_DATA(local_weight) +
i * weight_dim[1] + j);
}
}
Py_DECREF(local_weight);
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE(bottom, 0);
// full convolution: gemm, then col2im3d
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
for ( int g = 0; g < numgroups; ++g){
// gemm into columns
%(gemm)s(&NTrans, &Trans,
&N_, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride, &K_,
&zero,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_);
}
// col2im back to the data
col2im3d((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels,
bottomHeight, bottomWidth, bottomDepth,
kH, kW, kD, dilH, dilW, dilD, padH, padW, padD, dH, dW, dD,
(%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride);
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
}
// Free temporary columns
Py_DECREF(col);
// decref from contiguous check
Py_DECREF(bottom);
Py_DECREF(weight);
Py_DECREF(top);
// Note that we don't change the refcount of the output matrix here. Output
// (re)allocation and refcounting is done in BaseCorr3dMM.c_code_helper();
// in here output is just aliased to one of bottom, weights, or top.
return output;
}
// This uses a lot of code from Caffe (http://caffe.berkeleyvision.org/);
// sources are clearly marked. Below we reproduce the original license of
// the Caffe software.
/*
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp)
// Loops for fast unfold + copy
void im2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
const int stride_h, const int stride_w,
%(float_type)s* data_col) {
// Implicit dilated kernel size
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + pad_hl + pad_hr - dil_kernel_h) / stride_h + 1;
int width_col = (width + pad_wl + pad_wr - dil_kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% kernel_w;
int h_offset = (c / kernel_w) %% kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(npy_intp)(c * height_col + h) * width_col + w] =
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad];
else
data_col[(npy_intp)(c * height_col + h) * width_col + w] = 0.;
}
}
}
}
// Unlike the Caffe and PyTensor GPU versions, the data_im array is set to zero
// before the col2im call rather than doing it here. So, the result is just
// accumulated into data_im.
void col2im(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w,
const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
const int stride_h, const int stride_w,
%(float_type)s* data_im) {
// Implicit dilated patch
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + pad_hl + pad_hr - dil_patch_h) / stride_h + 1;
int width_col = (width + pad_wl + pad_wr - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width;
int channels_col = channels * patch_h * patch_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% patch_w;
int h_offset = (c / patch_w) %% patch_h;
int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] +=
data_col[(npy_intp)(c * height_col + h) * width_col + w];
}
}
}
}
// PyTensor op code
// GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
// Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject* corrMM(PyArrayObject* bottom,
PyArrayObject* weight,
PyArrayObject* top,
const int direction,
const int dH = 1,
const int dW = 1,
const int dilH = 1,
const int dilW = 1,
const int padH_l = 0,
const int padH_r = 0,
const int padW_l = 0,
const int padW_r = 0,
const int numgroups = 1,
const int unshared = 0)
{
if (PyArray_NDIM(bottom) != 4)
{
PyErr_SetString(PyExc_ValueError, "CorrMM requires bottom of 4D");
return NULL;
}
if (PyArray_TYPE(bottom) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received bottom with wrong type.");
return NULL;
}
if (PyArray_NDIM(weight) != (unshared ? 6 : 4))
{
PyErr_Format(PyExc_ValueError, "CorrMM requires weight of %%dD", unshared ? 6 : 4);
return NULL;
}
if (PyArray_TYPE(weight) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received weight with wrong type.");
return NULL;
}
if (PyArray_NDIM(top) != 4)
{
PyErr_SetString(PyExc_ValueError, "CorrMM requires top of 4D");
return NULL;
}
if (PyArray_TYPE(top) != %(float_typenum)s)
{
PyErr_SetString(PyExc_ValueError, "CorrMM received top with wrong type.");
return NULL;
}
// Ensure data is contiguous
bottom = PyArray_GETCONTIGUOUS(bottom);
weight = PyArray_GETCONTIGUOUS(weight);
top = PyArray_GETCONTIGUOUS(top);
// Extract some shape information for later and check shape consistency
// bottom: (batchSize, nChannels, bottomHeight, bottomWidth)
const int batchSize = PyArray_DIMS(bottom)[0];
const int nChannels = PyArray_DIMS(bottom)[1];
const int bottomHeight = PyArray_DIMS(bottom)[2];
const int bottomWidth = PyArray_DIMS(bottom)[3];
// normal weights: (nFilters, nChannels, rows, columns)
// unshared weights: (nFilters, topHeight, topWidth, nChannels, rows, columns)
const int nFilters = PyArray_DIMS(weight)[0];
const int kH = PyArray_DIMS(weight)[unshared ? 4 : 2];
const int kW = PyArray_DIMS(weight)[unshared ? 5 : 3];
if (nChannels != PyArray_DIMS(weight)[unshared ? 3 : 1] * numgroups) {
PyErr_SetString(PyExc_ValueError,
"CorrMM images and kernel must have the same stack size\n");
return NULL;
}
if ((nFilters %% numgroups) != 0) {
PyErr_SetString(PyExc_ValueError,
"CorrMM the number of filters must be divisible by the number of groups\n");
return NULL;
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeightNoDH = (bottomHeight + padH_l + padH_r - dil_kH);
const int topWidthNoDW = (bottomWidth + padW_l + padW_r - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (unshared) {
if (topHeight != PyArray_DIMS(weight)[1] ||
topWidth != PyArray_DIMS(weight)[2]) {
PyErr_Format(PyExc_ValueError,
"CorrMM regions in kernel must match output regions:\n"
" bottom shape: %%d %%d %%d %%d\n"
" weight shape: %%d %%ld %%ld %%d %%d %%d"
" (expected %%d %%d %%d %%d %%d %%d)\n"
" top shape(calculated): %%d %%d %%d %%d\n",
batchSize, nChannels, bottomHeight, bottomWidth,
nFilters, PyArray_DIMS(weight)[1],
PyArray_DIMS(weight)[2], nChannels / numgroups, kH, kW,
nFilters, topHeight, topWidth, nChannels / numgroups, kH, kW,
batchSize, nFilters, topHeight, topWidth);
return NULL;
}
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
topWidth != PyArray_DIMS(top)[3]) {
PyErr_Format(PyExc_ValueError,
"CorrMM shape inconsistency:\n"
" bottom shape: %%d %%d %%d %%d\n"
" weight shape: %%d %%d %%d %%d %%d %%d\n"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)\n",
batchSize, nChannels, bottomHeight, bottomWidth,
nFilters, topHeight, topWidth, nChannels / numgroups, kH, kW,
PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
PyArray_DIMS(top)[2], PyArray_DIMS(top)[3],
batchSize, nFilters, topHeight, topWidth);
return NULL;
}
}
else {
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
topWidth != PyArray_DIMS(top)[3]) {
PyErr_Format(PyExc_ValueError,
"CorrMM shape inconsistency:\n"
" bottom shape: %%d %%d %%d %%d\n"
" weight shape: %%d %%d %%d %%d\n"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)\n",
batchSize, nChannels, bottomHeight, bottomWidth,
nFilters, nChannels / numgroups, kH, kW,
PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
PyArray_DIMS(top)[2], PyArray_DIMS(top)[3],
batchSize, nFilters, topHeight, topWidth);
return NULL;
}
}
// Create temporary columns
int max_threads = %(omp_get_max_threads)s;
if (batchSize < max_threads) {
max_threads = batchSize;
}
npy_intp col_dim[3];
col_dim[0] = (npy_intp)max_threads;
col_dim[1] = (npy_intp)(nChannels * kW * kH);
col_dim[2] = (npy_intp)(topHeight * topWidth);
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
PyArrayObject* col = (PyArrayObject*)PyArray_ZEROS(3,
col_dim,
PyArray_TYPE(top),
0);
if (NULL == col) {
PyErr_Format(PyExc_RuntimeError,
"CorrMM failed to allocate working memory of"
" %%ld x %%ld x %%ld\n",
col_dim[0], col_dim[1], col_dim[2]);
return NULL;
}
// Define some useful variables
const int batch_bottom_stride = PyArray_STRIDES(bottom)[0]/%(n_bytes)f;
const int group_bottom_stride = (PyArray_STRIDES(bottom)[1] * nChannels / numgroups)/%(n_bytes)f;
const int batch_top_stride = PyArray_STRIDES(top)[0]/%(n_bytes)f;
const int group_top_stride = (PyArray_STRIDES(top)[1] * nFilters / numgroups)/%(n_bytes)f;
const int K_ = col_dim[1] / numgroups;
const int N_ = col_dim[2];
const int col_stride = (K_ * N_ * numgroups);
const int group_col_stride = (K_ * N_);
const int group_weight_stride = (PyArray_STRIDES(weight)[0] * nFilters / numgroups)/%(n_bytes)f;
const int M_ = nFilters / numgroups;
const int one_int = 1;
const %(c_float_type)s one = 1.0;
const %(c_float_type)s zero = 0.0;
const int ldw = (K_ * N_);
char NTrans = 'N';
char Trans = 'T';
PyArrayObject *output;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top;
// valid correlation: im2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride, nChannels,
bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm
if (unshared) {
for (int g = 0; g < numgroups; ++g) {
for (int reg = 0; reg < N_; ++reg) {
%(gemv)s(&Trans, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride + reg * K_, &ldw,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride + reg, &N_,
&zero,
(%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride + reg, &N_);
}
}
}
else {
for ( int g = 0; g < numgroups; ++g){
// Second, gemm
%(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_,
&one,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride, &K_,
&zero,
(%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_);
}
}
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
/*
// Original caffe code for comparison
// Note that this code was translated from the PyTensor GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups here,
// but the group-related offsets help explain what M_, N_ and K_ are
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_N,
N_, M_, K_,
1.,
col_data + col_offset * g, N_,
weight + weight_offset * g, K_,
0.,
top_data + (*top)[i]->offset(n) + top_offset * g, N_);
}
}
*/
}
else if (direction == 1) { // backprop wrt. weights
output = weight;
npy_intp weight_dim[2];
weight_dim[0] = (npy_intp)max_threads;
if (unshared)
weight_dim[1] = (npy_intp)(M_ * N_ * K_ * numgroups);
else
weight_dim[1] = (npy_intp)(M_ * K_ * numgroups);
PyArrayObject* local_weight = (PyArrayObject*)PyArray_ZEROS(2,
weight_dim, PyArray_TYPE(weight), 0);
if (NULL == local_weight)
{
PyErr_Format(PyExc_RuntimeError,
"CorrMM failed to allocate weight memory of %%ld x %%ld\n",
weight_dim[0], weight_dim[1]);
return NULL;
}
// valid convolution: im2col, then gemm
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
// OMP for batch-level paralization
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride,
nChannels, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
if (unshared) {
for (int g = 0; g < numgroups; ++g) {
for (int reg = 0; reg < N_; ++reg) {
%(gemm)s(&Trans, &NTrans,
&K_, &M_, &one_int,
&one,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride + reg, &N_,
(%(float_type)s*)PyArray_DATA(top) + g * group_top_stride + n * batch_top_stride + reg, &N_,
(n == 0) ? &zero : &one,
(%(float_type)s*)PyArray_DATA(local_weight) + g * group_weight_stride + reg * K_ +
tid * weight_dim[1], &ldw);
}
}
}
else {
for(int g = 0; g < numgroups; ++g){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%(gemm)s(&Trans, &NTrans,
&K_, &M_, &N_,
&one,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_,
(%(float_type)s*)PyArray_DATA(top) + g * group_top_stride + n * batch_top_stride, &N_,
(n == 0) ? &zero : &one,
(%(float_type)s*)PyArray_DATA(local_weight) + g * group_weight_stride +
tid * weight_dim[1], &K_);
}
}
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
//aggregate weights
memset((%(float_type)s*)PyArray_DATA(weight), 0, weight_dim[1]*sizeof(%(float_type)s));
/*
* Put index "j" into outer loop to get the
* correct result when openmp is used.
*/
%(omp_flags)s
for(int j = 0; j < weight_dim[1]; ++j){
for(int i = 0; i < max_threads; ++i){
((%(float_type)s*)PyArray_DATA(weight))[j] +=
*((%(float_type)s*)PyArray_DATA(local_weight) +
i * weight_dim[1] + j);
}
}
Py_DECREF(local_weight);
/*
// Original caffe code for comparison
// Note that this code was translated from the PyTensor GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups
for (int n = 0; n < num_; ++n) {
// Since we saved memory in the forward pass by not storing all col
// data, we will need to recompute them.
im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_T, CUBLAS_OP_N, K_, M_, N_,
1.0,
col_data + col_offset * g, N_,
top_diff + top[i]->offset(n) + top_offset * g, N_,
1.0,
weight_diff + weight_offset * g, K_);
}
}
*/
}
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE(bottom, 0);
// full convolution: gemm, then col2im
// Iterate over batch
int blas_threads_saved = %(blas_get_num_threads)s;
// Always forcing gemm to one thread when OpenMP is enabled for best and stable performance.
%(blas_set_num_threads)s(1);
%(omp_flags)s
for (int n = 0; n < batchSize; ++n) {
int tid = %(omp_get_thread_num)s;
if (unshared) {
for (int g = 0; g < numgroups; ++g){
for (int reg = 0; reg < N_; ++reg){
%(gemm)s(&NTrans, &Trans,
&one_int, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(top) + g * group_top_stride + n * batch_top_stride + reg, &N_,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride + reg * K_, &ldw,
&zero,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride + reg, &N_);
}
}
}
else {
for (int g = 0; g < numgroups; ++g) {
%(gemm)s(&NTrans, &Trans,
&N_, &K_, &M_,
&one,
(%(float_type)s*)PyArray_DATA(top) + g * group_top_stride + n * batch_top_stride, &N_,
(%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride, &K_,
&zero,
(%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_);
}
}
// col2im back to the data
col2im((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels, bottomHeight, bottomWidth,
kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r,
dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride);
}
// Restore to previous blas threads
%(blas_set_num_threads)s(blas_threads_saved);
/*
// Original caffe code for comparison
// Note that this code was translated from the PyTensor GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
for (int n = 0; n < num_; ++n) {
// gradient w.r.t. bottom data, if necessary
if (propagate_down[i]) {
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[i]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_T, N_, K_, M_,
1.,
top_diff + top[i]->offset(n) + top_offset * g, N_,
weight + weight_offset * g, K_,
0.,
col_diff + col_offset * g, N_);
}
// col2im back to the data
col2im_gpu(col_diff, channels_, height_, width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
bottom_diff + (*bottom)[i]->offset(n));
}
}
*/
}
// Free temporary columns
Py_DECREF(col);
// decref from contiguous check
Py_DECREF(bottom);
Py_DECREF(weight);
Py_DECREF(top);
// Note that we don't change the refcount of the output matrix here. Output
// (re)allocation and refcounting is done in BaseCorrMM.c_code_helper();
// in here output is just aliased to one of bottom, weights, or top.
return output;
}
import logging
from pathlib import Path
import pytensor
from pytensor.configdefaults import config
from pytensor.graph.basic import Apply
from pytensor.graph.op import _NoPythonOp
from pytensor.link.c.op import OpenMPOp
from pytensor.link.c.params_type import ParamsType
from pytensor.link.c.type import EnumList
from pytensor.scalar import int64
from pytensor.tensor import blas_headers
from pytensor.tensor.basic import as_tensor_variable
from pytensor.tensor.blas import blas_header_version, ldflags
from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
from pytensor.tensor.type import TensorType
_logger = logging.getLogger(__name__)
C_CODE_PATH = Path(__file__).parent / "c_code"
class BaseCorr3dMM(OpenMPOp, _NoPythonOp):
"""
Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and
`Corr3dMM_gradInputs`. Cannot be used directly.
Every sub-class must define internal attribute ``_direction`` out of __init__().
``_direction`` must take one of following values:
- "forward" to correlate bottom with weights and store results in top.
- "backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights.
- "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
Parameters
----------
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
or a tuple of three of integers
subsample
Perform subsampling of the output (default: (1, 1, 1)).
filter_dilation
Perform dilated correlation (default: (1, 1, 1))
num_groups
Perform grouped convolutions (default: 1)
"""
check_broadcast = False
__props__ = ("border_mode", "subsample", "filter_dilation", "num_groups")
_direction: str | None = None
params_type = ParamsType(
direction=EnumList(
("DIRECTION_FORWARD", "forward"), # 0
("DIRECTION_BACKPROP_WEIGHTS", "backprop weights"), # 1
("DIRECTION_BACKPROP_INPUTS", "backprop inputs"),
), # 2
dH=int64,
dW=int64,
dD=int64,
dilH=int64,
dilW=int64,
dilD=int64,
padH=int64,
padW=int64,
padD=int64,
num_groups=int64,
)
def __init__(
self,
border_mode="valid",
subsample=(1, 1, 1),
filter_dilation=(1, 1, 1),
openmp=None,
num_groups=1,
):
super().__init__(openmp=openmp)
if isinstance(border_mode, int):
if border_mode < 0:
raise ValueError(
f"invalid border_mode {border_mode}, which must be a "
"non-negative integer"
)
border_mode = (border_mode, border_mode, border_mode)
if isinstance(border_mode, tuple):
if len(border_mode) != 3 or min(border_mode) < 0:
raise ValueError(
f"invalid border_mode {border_mode}, which must be a tuple of "
"three non-negative integers"
)
pad_h, pad_w, pad_d = map(int, border_mode)
border_mode = (pad_h, pad_w, pad_d)
if not (
(isinstance(border_mode, tuple) and min(border_mode) >= 0)
or border_mode in ("valid", "full", "half")
):
raise ValueError(
f"invalid border_mode {border_mode}, which must be either "
'"valid", "full", "half", an integer or a tuple of three'
" integers"
)
self.border_mode = border_mode
if len(subsample) != 3:
raise ValueError("subsample must have three elements")
if len(filter_dilation) != 3:
raise ValueError("filter_dilation must have three elements")
self.subsample = tuple(subsample)
self.filter_dilation = tuple(filter_dilation)
if num_groups < 1:
raise ValueError("Number of groups should be greater than 0")
self.num_groups = num_groups
if not config.blas__ldflags:
# PyTensor will use a NumPy C implementation of [sd]gemm_ instead.
self.blas_type = ""
else:
if "openblas" in config.blas__ldflags:
self.blas_type = "openblas"
elif "mkl" in config.blas__ldflags:
self.blas_type = "mkl"
else:
self.blas_type = ""
if self._direction not in ("forward", "backprop weights", "backprop inputs"):
raise ValueError(
"_direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
@property
def pad(self):
if self.border_mode == "half":
return (-1, -1, -1)
elif self.border_mode == "full":
return (-2, -2, -2)
elif isinstance(self.border_mode, tuple):
return self.border_mode
else:
assert self.border_mode == "valid"
return (0, 0, 0)
# Direction should be converted to real enum value,
# as it is compared to integer later in c_code_helper().
direction = property(lambda self: self.params_type.enum_from_alias(self._direction))
dH = property(lambda self: self.subsample[0])
dW = property(lambda self: self.subsample[1])
dD = property(lambda self: self.subsample[2])
dilH = property(lambda self: self.filter_dilation[0])
dilW = property(lambda self: self.filter_dilation[1])
dilD = property(lambda self: self.filter_dilation[2])
padH = property(lambda self: self.pad[0])
padW = property(lambda self: self.pad[1])
padD = property(lambda self: self.pad[2])
def __str__(self):
return f"{self.__class__.__name__}{{{self.border_mode}, {self.subsample!s}, {self.filter_dilation!s}, {self.num_groups!s}}}"
@staticmethod
def as_common_dtype(in1, in2):
"""
Upcast input variables if necessary.
"""
dtype = pytensor.scalar.upcast(in1.dtype, in2.dtype)
return in1.astype(dtype), in2.astype(dtype)
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "num_groups"):
self.num_groups = 1
def c_support_code(self, **kwargs):
ccodes = blas_headers.blas_header_text()
if self.blas_type == "openblas":
ccodes += blas_headers.openblas_threads_text()
elif self.blas_type == "mkl":
ccodes += blas_headers.mkl_threads_text()
return ccodes
def c_libraries(self, **kwargs):
return ldflags()
def c_compile_args(self, **kwargs):
compile_args = ldflags(libs=False, flags=True)
compile_args += super().c_compile_args(**kwargs)
return compile_args
def c_lib_dirs(self, **kwargs):
return ldflags(libs=False, libs_dir=True)
def c_header_dirs(self, **kwargs):
return ldflags(libs=False, include_dir=True)
def c_headers(self, **kwargs):
headers = ["<stdio.h>"]
headers += super().c_headers(**kwargs)
return headers
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (8, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# these files
sub = {}
dtype = str(node.__dict__["inputs"][0].dtype)
assert dtype in ("float32", "float64")
if dtype == "float32":
sub["gemm"] = "sgemm_"
sub["float_type"] = "npy_float"
sub["float_typenum"] = "NPY_FLOAT"
sub["n_bytes"] = 4
sub["c_float_type"] = "float"
else:
sub["gemm"] = "dgemm_"
sub["float_type"] = "npy_double"
sub["float_typenum"] = "NPY_DOUBLE"
sub["n_bytes"] = 8
sub["c_float_type"] = "double"
if self.openmp:
sub["omp_flags"] = "#pragma omp parallel for schedule(static)"
sub["omp_get_max_threads"] = "omp_get_max_threads()"
sub["omp_get_thread_num"] = "omp_get_thread_num()"
if self.blas_type == "openblas":
sub["blas_set_num_threads"] = "openblas_set_num_threads"
sub["blas_get_num_threads"] = "openblas_get_num_threads()"
elif self.blas_type == "mkl":
sub["blas_set_num_threads"] = "mkl_set_num_threads"
sub["blas_get_num_threads"] = "mkl_get_max_threads()"
else:
sub["blas_set_num_threads"] = ""
sub["blas_get_num_threads"] = "0"
else:
sub["omp_flags"] = ""
sub["omp_get_max_threads"] = "1"
sub["omp_get_thread_num"] = "0"
sub["blas_set_num_threads"] = ""
sub["blas_get_num_threads"] = "0"
final_code = Path(C_CODE_PATH / "corr3d_gemm.c").read_text("utf-8")
return final_code % sub
def c_code_helper(
self, bottom, weights, top, sub, height=None, width=None, depth=None
):
"""
This generates the C code for Corr3dMM (direction="forward"),
Corr3dMM_gradWeights (direction="backprop weights"), and
Corr3dMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param sub: Dictionary of substitutions usable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
:param depth: If self.subsample[1] != 1, a variable giving the depth
of the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the depth of the
filters for direction="backprop weights". Ignored otherwise.
"""
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if height:
height = f"(*(npy_int64 *)(PyArray_DATA({height})))"
else:
if ((self.direction != 0) and (self.dH != 1)) or (
(self.direction == 1) and (self.padH == -1)
):
raise ValueError(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height = "-1"
if width:
width = f"(*(npy_int64 *)(PyArray_DATA({width})))"
else:
if ((self.direction != 0) and (self.dW != 1)) or (
(self.direction == 1) and (self.padW == -1)
):
raise ValueError(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width = "-1"
if depth:
depth = f"(*(npy_int64 *)(PyArray_DATA({depth})))"
else:
if ((self.direction != 0) and (self.dD != 1)) or (
(self.direction == 1) and (self.padD == -1)
):
raise ValueError(
"depth must be given for backprop with depth sampling or border_mode='half'"
)
depth = "-1"
fail = sub["fail"]
params = sub["params"]
return f"""
// Mandatory args
int direction = {params}->direction; // forward, bprop weights, bprop inputs
// Optional args
int dH = {params}->dH;
int dW = {params}->dW;
int dD = {params}->dD;
int dilH = {params}->dilH;
int dilW = {params}->dilW;
int dilD = {params}->dilD;
int padH = {params}->padH;
int padW = {params}->padW;
int padD = {params}->padD;
int numgroups = {params}->num_groups;
PyArrayObject * bottom = {bottom};
PyArrayObject * weights = {weights};
PyArrayObject * top = {top};
PyArrayObject * out2 = NULL;
PyArrayObject **out = NULL;
switch({params}->direction) {{
case DIRECTION_FORWARD:
out = &{top};
break;
case DIRECTION_BACKPROP_WEIGHTS:
out = &{weights};
break;
case DIRECTION_BACKPROP_INPUTS:
out = &{bottom};
break;
default:
PyErr_SetString(PyExc_ValueError, "CPU Corr3dMM: Invalid direction.");
{{{fail}}}
break;
}}
// Obtain or infer kernel width, height and depth
// (we need to know it early to be able to handle auto-padding)
int kH, kW, kD, dil_kH, dil_kW, dil_kD;
if (direction != 1) {{
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
kD = PyArray_DIMS(weights)[4];
}}
else {{
if ({height} != -1) {{
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = {height};
}}
else if (padH == -2) {{
// vertical full padding, we can infer the kernel height
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
}}
else {{
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}}
if ({width} != -1) {{
kW = {width};
}}
else if (padW == -2) {{
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}}
else {{
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}}
if ({depth} != -1) {{
kD = {depth};
}}
else if (padD == -2) {{
kD = (2 - PyArray_DIMS(bottom)[4] + (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}}
else {{
kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}}
}}
// Implicit dilated kernel size
dil_kH = (kH - 1) * dilH + 1;
dil_kW = (kW - 1) * dilW + 1;
dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested
if (padH == -1) {{ // vertical half padding
padH = dil_kH / 2;
}}
else if (padH == -2) {{ // vertical full padding
padH = dil_kH - 1;
}}
else if (padH < 0) {{
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padH must be >= -2");
{fail}
}}
if (padW == -1) {{ // horizontal half padding
padW = dil_kW / 2;
}}
else if (padW == -2) {{ // horizontal full padding
padW = dil_kW - 1;
}}
else if (padW < 0) {{
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padW must be >= -2");
{fail}
}}
if (padD == -1) {{ // depth half padding
padD = dil_kD / 2;
}}
else if (padD == -2) {{ // depth full padding
padD = dil_kD - 1;
}}
else if (padD < 0) {{
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padD must be >= -2");
{fail}
}}
// Infer output shape
npy_intp out_dim[5];
switch(direction) {{
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width, depth)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"Corr3dMM: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4]);
{fail}
}}
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[4] = (npy_intp)kD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
{fail}
}}
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1] * numgroups;
out_dim[2] = (npy_intp)(({height} != -1) ? {height} : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)(({width} != -1) ? {width} : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)(({depth} != -1) ? {depth} : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld x %ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
{fail}
}}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
{fail}
}}
// Prepare output array
int typenum;
if ( !(*out
&& PyArray_NDIM(*out)==4
&& PyArray_IS_C_CONTIGUOUS(*out)
&& PyArray_DIMS(*out)[0]==out_dim[0]
&& PyArray_DIMS(*out)[1]==out_dim[1]
&& PyArray_DIMS(*out)[2]==out_dim[2]
&& PyArray_DIMS(*out)[3]==out_dim[3]
&& PyArray_DIMS(*out)[4]==out_dim[4]))
{{
Py_XDECREF(*out);
if (direction != 1) {{
typenum = PyArray_TYPE(weights);
}}
else {{
typenum = PyArray_TYPE(bottom);
}}
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
*out = (PyArrayObject*)PyArray_ZEROS(5,
out_dim,
typenum,
0);
if (NULL == *out)
{{
PyErr_Format(PyExc_RuntimeError,
"BaseCorr3dMM: Failed to allocate output of %lld x %lld x %lld x %lld x %lld",
(long long)out_dim[0], (long long)out_dim[1],
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4]);
{fail}
}}
}}
// Call corr3dMM code
out2 = corr3dMM({bottom}, {weights}, {top}, direction,
dH, dW, dD, dilH, dilW, dilD, padH, padW, padD,
numgroups);
if (out2==NULL){{
{fail}
}}
assert (out2 == *out);
"""
class Corr3dMM(BaseCorr3dMM):
"""
CPU correlation implementation using Matrix Multiplication.
Parameters
----------
border_mode
The width of a border of implicit zeros to pad the
input with. Must be a tuple with 3 elements giving the width of
the padding on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime:
``'valid'`` for ``(0, 0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1, kernel_depth - 1)``
(full convolution), ``'half'`` for ``(kernel_rows // 2,
kernel_columns // 2, kernel_depth // 2)`` (same convolution for
odd-sized kernels). Note that the three widths are each
applied twice, once per side (left and right, top and bottom, front
and back).
subsample
The subsample operation applied to each output image. Should be a tuple
with 3 elements. Set to `(1, 1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 3 elements.
Set to `(1, 1, 1)` to disable filter dilation.
num_groups
Perform grouped convolutions (default: 1)
"""
_direction = "forward"
def make_node(self, img, kern):
img = as_tensor_variable(img)
kern = as_tensor_variable(kern)
img, kern = self.as_common_dtype(img, kern)
if img.type.ndim != 5:
raise TypeError("img must be 5D tensor")
if kern.type.ndim != 5:
raise TypeError("kern must be 5D tensor")
out_shape = [
1 if img.type.shape[0] == 1 else None,
1 if kern.type.shape[0] == 1 else None,
None,
None,
None,
]
dtype = img.type.dtype
return Apply(self, [img, kern], [TensorType(dtype, shape=out_shape)()])
def infer_shape(self, fgraph, node, input_shape):
imshp = input_shape[0]
kshp = input_shape[1]
res = get_conv_output_shape(
imshp, kshp, self.border_mode, self.subsample, self.filter_dilation
)
return [res]
def c_code(self, node, nodename, inp, out_, sub):
bottom, weights = inp
(top,) = out_
return super().c_code_helper(bottom, weights, top, sub)
def grad(self, inp, grads):
bottom, weights = inp
(top,) = grads
d_bottom = Corr3dMMGradInputs(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(weights, top, bottom.shape[-3:])
d_weights = Corr3dMMGradWeights(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(bottom, top, weights.shape[-3:])
return d_bottom, d_weights
class Corr3dMMGradWeights(BaseCorr3dMM):
"""
Gradient wrt. filters for `Corr3dMM`.
Notes
-----
You will not want to use this directly, but rely on
PyTensor's automatic differentiation or graph optimization to
use it as needed.
"""
_direction = "backprop weights"
def make_node(self, img, topgrad, shape=None):
img = as_tensor_variable(img)
topgrad = as_tensor_variable(topgrad)
img, topgrad = self.as_common_dtype(img, topgrad)
if img.type.ndim != 5:
raise TypeError("img must be 5D tensor")
if topgrad.type.ndim != 5:
raise TypeError("topgrad must be 5D tensor")
if shape is None:
if self.subsample != (1, 1, 1) or self.border_mode == "half":
raise ValueError(
"shape must be given if subsample != (1, 1, 1)"
' or border_mode == "half"'
)
height_width_depth = []
else:
height_width_depth = [
as_tensor_variable(shape[0]).astype("int64"),
as_tensor_variable(shape[1]).astype("int64"),
as_tensor_variable(shape[2]).astype("int64"),
]
out_shape = [
1 if topgrad.type.shape[1] == 1 else None,
1 if img.type.shape[1] == 1 else None,
None,
None,
None,
]
dtype = img.type.dtype
return Apply(
self,
[img, topgrad, *height_width_depth],
[TensorType(dtype, shape=out_shape)()],
)
def infer_shape(self, fgraph, node, input_shape):
if self.border_mode == "half":
padH = padW = padD = -1
elif self.border_mode == "full":
padH = padW = padD = -2
elif isinstance(self.border_mode, tuple):
padH, padW, padD = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = padD = 0
dH, dW, dD = self.subsample
imshp = input_shape[0]
topshp = input_shape[1]
ssize, imshp = imshp[1], list(imshp[2:])
ssize = ssize // self.num_groups
nkern, topshp = topshp[1], list(topshp[2:])
height_width_depth = node.inputs[-3:]
if (dH != 1) or (padH == -1):
# vertical subsampling or half padding, kernel height is specified
kH = height_width_depth[0]
elif padH == -2:
# vertical full padding, we can infer the kernel height
kH = 2 - imshp[0] + (topshp[0] - 1) * dH
else:
# explicit padding, we can infer the kernel height
kH = imshp[0] + 2 * padH - (topshp[0] - 1) * dH
if (dW != 1) or (padW == -1):
kW = height_width_depth[1]
elif padW == -2:
kW = 2 - imshp[1] + (topshp[1] - 1) * dW
else:
kW = imshp[1] + 2 * padW - (topshp[1] - 1) * dW
if (dD != 1) or (padD == -1):
kD = height_width_depth[2]
elif padD == -2:
kD = 2 - imshp[2] + (topshp[2] - 1) * dD
else:
kD = imshp[2] + 2 * padD - (topshp[2] - 1) * dD
return [(nkern, ssize, kH, kW, kD)]
def c_code(self, node, nodename, inp, out_, sub):
bottom, top = inp[:2]
height, width, depth = inp[2:] or (None, None, None)
(weights,) = out_
return super().c_code_helper(bottom, weights, top, sub, height, width, depth)
def grad(self, inp, grads):
bottom, top = inp[:2]
(weights,) = grads
d_bottom = Corr3dMMGradInputs(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(weights, top, bottom.shape[-3:])
d_top = Corr3dMM(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(bottom, weights)
d_height_width_depth = (
(pytensor.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
)
return (d_bottom, d_top, *d_height_width_depth)
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
class Corr3dMMGradInputs(BaseCorr3dMM):
"""
Gradient wrt. inputs for `Corr3dMM`.
Notes
-----
You will not want to use this directly, but rely on
PyTensor's automatic differentiation or graph optimization to
use it as needed.
"""
_direction = "backprop inputs"
def make_node(self, kern, topgrad, shape=None):
kern = as_tensor_variable(kern)
topgrad = as_tensor_variable(topgrad)
kern, topgrad = self.as_common_dtype(kern, topgrad)
if kern.type.ndim != 5:
raise TypeError("kern must be 5D tensor")
if topgrad.type.ndim != 5:
raise TypeError("topgrad must be 5D tensor")
if shape is None:
if self.subsample != (1, 1, 1):
raise ValueError("shape must be given if subsample != (1, 1, 1)")
height_width_depth = []
else:
height_width_depth = [
as_tensor_variable(shape[0]).astype("int64"),
as_tensor_variable(shape[1]).astype("int64"),
as_tensor_variable(shape[2]).astype("int64"),
]
if self.num_groups > 1:
out_shape = [
1 if topgrad.type.shape[0] == 1 else None,
None,
None,
None,
None,
]
else:
out_shape = [
1 if topgrad.type.shape[0] == 1 else None,
1 if kern.type.shape[1] == 1 else None,
False,
False,
False,
]
dtype = kern.type.dtype
return Apply(
self,
[kern, topgrad, *height_width_depth],
[TensorType(dtype, shape=out_shape)()],
)
def infer_shape(self, fgraph, node, input_shape):
if self.border_mode == "half":
padH = padW = padD = -1
elif self.border_mode == "full":
padH = padW = padD = -2
elif isinstance(self.border_mode, tuple):
padH, padW, padD = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = padD = 0
dH, dW, dD = self.subsample
kshp = input_shape[0]
topshp = input_shape[1]
ssize, kshp = kshp[1], list(kshp[2:])
ssize = ssize * self.num_groups
bsize, topshp = topshp[0], list(topshp[2:])
height_width_depth = node.inputs[-3:]
if padH == -1:
padH = kshp[0] // 2
elif padH == -2:
padH = kshp[0] - 1
elif padH < -2:
raise ValueError("Corr3dMM_gradInputs: border_mode must be >= 0.")
if padW == -1:
padW = kshp[1] // 2
elif padW == -2:
padW = kshp[1] - 1
elif padW < -2:
raise ValueError("Corr3dMM_gradInputs: border_mode must be >= 0.")
if padD == -1:
padD = kshp[2] // 2
elif padD == -2:
padD = kshp[2] - 1
elif padD < -2:
raise ValueError("Corr3dMM_gradInputs: border_mode must be >= 0.")
if dH != 1:
out_shp0 = height_width_depth[0]
else:
out_shp0 = (topshp[0] - 1) * dH + kshp[0] - 2 * padH
if dW != 1:
out_shp1 = height_width_depth[1]
else:
out_shp1 = (topshp[1] - 1) * dW + kshp[1] - 2 * padW
if dD != 1:
out_shp2 = height_width_depth[2]
else:
out_shp2 = (topshp[2] - 1) * dD + kshp[2] - 2 * padD
out_shp = (out_shp0, out_shp1, out_shp2)
return [(bsize, ssize, *out_shp)]
def c_code(self, node, nodename, inp, out_, sub):
weights, top = inp[:2]
height, width, depth = inp[2:] or (None, None, None)
(bottom,) = out_
return super().c_code_helper(bottom, weights, top, sub, height, width, depth)
def grad(self, inp, grads):
weights, top = inp[:2]
(bottom,) = grads
d_weights = Corr3dMMGradWeights(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(bottom, top, weights.shape[-3:])
d_top = Corr3dMM(
self.border_mode,
self.subsample,
self.filter_dilation,
num_groups=self.num_groups,
)(bottom, weights)
d_height_width_depth = (
(pytensor.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
)
return (d_weights, d_top, *d_height_width_depth)
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
import logging
from pathlib import Path
import pytensor
from pytensor.configdefaults import config
from pytensor.graph.basic import Apply
from pytensor.graph.op import _NoPythonOp
from pytensor.link.c.op import OpenMPOp
from pytensor.link.c.params_type import ParamsType
from pytensor.link.c.type import EnumList
from pytensor.scalar import int8, int64
from pytensor.tensor import blas_headers
from pytensor.tensor.basic import as_tensor_variable
from pytensor.tensor.blas import blas_header_version, ldflags
from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
from pytensor.tensor.type import TensorType
_logger = logging.getLogger(__name__)
C_CODE_PATH = Path(__file__).parent / "c_code"
class BaseCorrMM(OpenMPOp, _NoPythonOp):
"""
Base class for `CorrMM`, `CorrMM_gradWeights` and
`CorrMM_gradInputs`. Cannot be used directly.
Every sub-class must define internal attribute ``_direction`` out of __init__().
``_direction`` must take one of following values:
- "forward" to correlate bottom with weights and store results in top.
- "backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights.
- "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
Parameters
----------
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer,
a pair of integers, or two pairs of integers.
subsample
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform dilated correlation (default: (1,1))
num_groups
Perform grouped convolutions (default: 1)
unshared
Perform unshared correlation (default: False)
"""
check_broadcast = False
__props__ = (
"border_mode",
"subsample",
"filter_dilation",
"num_groups",
"unshared",
)
_direction: str | None = None
params_type = ParamsType(
direction=EnumList(
("DIRECTION_FORWARD", "forward"), # 0
("DIRECTION_BACKPROP_WEIGHTS", "backprop weights"), # 1
("DIRECTION_BACKPROP_INPUTS", "backprop inputs"),
), # 2
dH=int64,
dW=int64,
dilH=int64,
dilW=int64,
padH_l=int64,
padH_r=int64,
padW_l=int64,
padW_r=int64,
num_groups=int64,
unshared=int8,
)
def __init__(
self,
border_mode="valid",
subsample=(1, 1),
filter_dilation=(1, 1),
num_groups=1,
unshared=False,
openmp=None,
):
super().__init__(openmp=openmp)
if isinstance(border_mode, int):
if border_mode < 0:
raise ValueError(
f"invalid border_mode {border_mode}, which must be a "
"non-negative integer"
)
border_mode = ((border_mode, border_mode),) * 2
elif isinstance(border_mode, tuple):
if len(border_mode) != 2:
raise ValueError(
f"invalid border_mode {border_mode} which must be a "
"tuple of length 2"
)
border = ()
for mode in border_mode:
if isinstance(mode, tuple) and len(mode) == 2 and min(mode) >= 0:
border += ((int(mode[0]), int(mode[1])),)
elif mode >= 0:
border += ((int(mode), int(mode)),)
else:
raise ValueError(
f"invalid border mode {border_mode}. The tuple can only contain "
"integers or tuples of length 2"
)
border_mode = border
elif border_mode not in ("valid", "full", "half"):
raise ValueError(
f"invalid border_mode {border_mode}, which must be either "
'"valid", "full", "half", an integer or a tuple '
"of two integers or a pair of integers"
)
self.border_mode = border_mode
if len(subsample) != 2:
raise ValueError("subsample must have two elements")
if len(filter_dilation) != 2:
raise ValueError("filter_dilation must have two elements")
self.subsample = tuple(subsample)
self.filter_dilation = tuple(filter_dilation)
self.unshared = unshared
if not config.blas__ldflags:
# PyTensor will use a NumPy C implementation of [sd]gemm_ instead.
self.blas_type = ""
else:
if "openblas" in config.blas__ldflags:
self.blas_type = "openblas"
elif "mkl" in config.blas__ldflags:
self.blas_type = "mkl"
else:
self.blas_type = ""
if self._direction not in ("forward", "backprop weights", "backprop inputs"):
raise ValueError(
"_direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
if num_groups < 1:
raise ValueError("Number of groups should be greater than 0")
self.num_groups = num_groups
@property
def pad(self):
if self.border_mode == "half":
return ((-1, -1),) * 2
elif self.border_mode == "full":
return ((-2, -2),) * 2
elif isinstance(self.border_mode, tuple):
return self.border_mode
else:
assert self.border_mode == "valid"
return ((0, 0),) * 2
# Direction should be converted to real enum value,
# as it is compared to integer later in c_code_helper().
direction = property(lambda self: self.params_type.enum_from_alias(self._direction))
dH = property(lambda self: self.subsample[0])
dW = property(lambda self: self.subsample[1])
dilH = property(lambda self: self.filter_dilation[0])
dilW = property(lambda self: self.filter_dilation[1])
padH_l = property(lambda self: self.pad[0][0])
padH_r = property(lambda self: self.pad[0][1])
padW_l = property(lambda self: self.pad[1][0])
padW_r = property(lambda self: self.pad[1][1])
def __str__(self):
return f"{self.__class__.__name__}{{{self.border_mode}, {self.subsample!s}, {self.filter_dilation!s}, {self.num_groups!s} {self.unshared!s}}}"
@staticmethod
def as_common_dtype(in1, in2):
"""
Upcast input variables if necessary.
"""
dtype = pytensor.scalar.upcast(in1.dtype, in2.dtype)
return in1.astype(dtype), in2.astype(dtype)
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "num_groups"):
self.num_groups = 1
def c_support_code(self, **kwargs):
ccodes = blas_headers.blas_header_text()
if self.blas_type == "openblas":
ccodes += blas_headers.openblas_threads_text()
elif self.blas_type == "mkl":
ccodes += blas_headers.mkl_threads_text()
return ccodes
def c_libraries(self, **kwargs):
return ldflags()
def c_compile_args(self, **kwargs):
compile_args = ldflags(libs=False, flags=True)
compile_args += super().c_compile_args(**kwargs)
return compile_args
def c_lib_dirs(self, **kwargs):
return ldflags(libs=False, libs_dir=True)
def c_header_dirs(self, **kwargs):
return ldflags(libs=False, include_dir=True)
def c_headers(self, **kwargs):
headers = ["<stdio.h>"]
headers += super().c_headers(**kwargs)
return headers
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (10, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# these files
sub = {}
dtype = str(node.__dict__["inputs"][0].dtype)
assert dtype in ("float32", "float64")
if dtype == "float32":
sub["gemm"] = "sgemm_"
sub["gemv"] = "sgemv_"
sub["float_type"] = "npy_float"
sub["float_typenum"] = "NPY_FLOAT"
sub["n_bytes"] = 4
sub["c_float_type"] = "float"
else:
sub["gemm"] = "dgemm_"
sub["gemv"] = "dgemv_"
sub["float_type"] = "npy_double"
sub["float_typenum"] = "NPY_DOUBLE"
sub["n_bytes"] = 8
sub["c_float_type"] = "double"
if self.openmp:
sub["omp_flags"] = "#pragma omp parallel for schedule(static)"
sub["omp_get_max_threads"] = "omp_get_max_threads()"
sub["omp_get_thread_num"] = "omp_get_thread_num()"
if self.blas_type == "openblas":
sub["blas_set_num_threads"] = "openblas_set_num_threads"
sub["blas_get_num_threads"] = "openblas_get_num_threads()"
elif self.blas_type == "mkl":
sub["blas_set_num_threads"] = "mkl_set_num_threads"
sub["blas_get_num_threads"] = "mkl_get_max_threads()"
else:
sub["blas_set_num_threads"] = ""
sub["blas_get_num_threads"] = "0"
else:
sub["omp_flags"] = ""
sub["omp_get_max_threads"] = "1"
sub["omp_get_thread_num"] = "0"
sub["blas_set_num_threads"] = ""
sub["blas_get_num_threads"] = "0"
final_code = (C_CODE_PATH / "corr_gemm.c").read_text("utf-8")
return final_code % sub
def c_code_helper(self, bottom, weights, top, sub, height=None, width=None):
"""
This generates the C code for CorrMM (direction="forward"),
CorrMM_gradWeights (direction="backprop weights"), and
CorrMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param sub: Dictionary of substitutions usable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
"""
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if height:
height = f"(*(npy_int64 *)(PyArray_DATA({height})))"
else:
if ((self.direction != 0) and (self.dH != 1)) or (
(self.direction == 1) and (self.padH_l == -1 or self.padH_r == -1)
):
raise ValueError(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height = "-1"
if width:
width = f"(*(npy_int64 *)(PyArray_DATA({width})))"
else:
if ((self.direction != 0) and (self.dW != 1)) or (
(self.direction == 1) and (self.padW_l == -1 or self.padW_r == -1)
):
raise ValueError(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width = "-1"
fail = sub["fail"]
params = sub["params"]
return f"""
// Mandatory args
int direction = {params}->direction; // forward, bprop weights, bprop inputs
// Optional args
int dH = {params}->dH;
int dW = {params}->dW;
int dilH = {params}->dilH;
int dilW = {params}->dilW;
int padH_l = {params}->padH_l;
int padH_r = {params}->padH_r;
int padW_l = {params}->padW_l;
int padW_r = {params}->padW_r;
int numgroups = {params}->num_groups;
int unshared = {params}->unshared;
PyArrayObject * bottom = {bottom};
PyArrayObject * weights = {weights};
PyArrayObject * top = {top};
PyArrayObject * out2 = NULL;
PyArrayObject **out = NULL;
switch({params}->direction) {{
case DIRECTION_FORWARD:
out = &{top};
break;
case DIRECTION_BACKPROP_WEIGHTS:
out = &{weights};
break;
case DIRECTION_BACKPROP_INPUTS:
out = &{bottom};
break;
default:
PyErr_SetString(PyExc_ValueError, "CPU CorrMM: Invalid direction.");
{{{fail}}}
break;
}}
int wdim, odim;
wdim = unshared ? 6 : 4;
odim = 4; //Can be set to 6 later for unshared backprop wrt weights
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW, dil_kH, dil_kW;
if (direction != 1) {{
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[wdim-2];
kW = PyArray_DIMS(weights)[wdim-1];
}}
else {{
if ({height} != -1) {{
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = {height};
}}
else if (padH_l == -2 || padH_r == -2) {{
// vertical full padding, we can infer the kernel height
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
}}
else {{
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + padH_l + padH_r - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}}
if ({width} != -1) {{
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW = {width};
}}
else if (padW_l == -2 || padW_r == -2) {{
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}}
else {{
kW = (PyArray_DIMS(bottom)[3] + padW_l + padW_r - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}}
}}
// Implicit dilated kernel size
dil_kH = (kH - 1) * dilH + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
if (padH_l == -1 || padH_r == -1) {{ // vertical half padding
padH_l = padH_r = dil_kH / 2;
}}
else if (padH_l == -2 || padH_r == -2) {{ // vertical full padding
padH_l = padH_r = dil_kH - 1;
}}
else if (padH_l < -2 || padH_r < -2) {{
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH_l and padH_r must be >= -2");
{fail}
}}
if (padW_l == -1 || padW_r == -1) {{ // horizontal half padding
padW_l = padW_r = dil_kW / 2;
}}
else if (padW_l == -2 || padW_r == -2) {{ // horizontal full padding
padW_l = padW_r = dil_kW - 1;
}}
else if (padW_l < -2 || padW_r < -2) {{
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW_l and padW_r must be >= -2");
{fail}
}}
// Infer output shape
npy_intp out_dim[6];
out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
switch(direction) {{
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + pad_l + pad_r - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + padH_l + padH_r - ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + padW_l + padW_r - ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{{
if (unshared) {{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}}
else {{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}}
{fail}
}}
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + pad_l + pad_r - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
if (unshared){{
odim = 6;
out_dim[1] = (npy_intp)PyArray_DIMS(top)[2];
out_dim[2] = (npy_intp)PyArray_DIMS(top)[3];
}}
out_dim[wdim-3] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
out_dim[wdim-2] = (npy_intp)kH; // already inferred further above
out_dim[wdim-1] = (npy_intp)kW; // how convenient
if (unshared) {{
if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
|| out_dim[4] <= 0 || out_dim[5] <= 0){{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4], (long int)out_dim[5],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
{fail}
}}
}}
else {{
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
{fail}
}}
}}
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (npy_intp)(({height} != -1) ? {height} : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - padH_l - padH_r);
out_dim[3] = (npy_intp)(({width} != -1) ? {width} : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - padW_l - padW_r);
if (unshared) {{
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
{fail}
}}
}}
else {{
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %ld x %ld x %ld x %ld\\n"
" weights shape: %ld x %ld x %ld x %ld\\n"
" top shape: %ld x %ld x %ld x %ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
{fail}
}}
}}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
{fail}
}}
// Prepare output array
int typenum;
int failure;
failure = !(*out
&& PyArray_NDIM(*out)==odim
&& PyArray_IS_C_CONTIGUOUS(*out)
&& PyArray_DIMS(*out)[0]==out_dim[0]
&& PyArray_DIMS(*out)[1]==out_dim[1]
&& PyArray_DIMS(*out)[2]==out_dim[2]
&& PyArray_DIMS(*out)[3]==out_dim[3]);
if (odim == 6){{
failure = failure || !(PyArray_DIMS(*out)[4]==out_dim[4]
&& PyArray_DIMS(*out)[5]==out_dim[5]);
}}
if ( failure )
{{
Py_XDECREF(*out);
if (direction != 1) {{
typenum = PyArray_TYPE(weights);
}}
else {{
typenum = PyArray_TYPE(bottom);
}}
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
*out = (PyArrayObject*)PyArray_ZEROS(odim,
out_dim,
typenum,
0);
if (NULL == *out)
{{
if (odim == 4) {{
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of %lld x %lld x %lld x %lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
}}
if (odim == 6) {{
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of %lld x %lld x %lld x %lld %lld %lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3],
(long long)out_dim[4], (long long)out_dim[5]);
}}
{fail}
}}
}}
// Call corrMM code
out2 = corrMM({bottom}, {weights}, {top}, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){{
{fail}
}}
assert (out2 == *out);
"""
class CorrMM(BaseCorrMM):
"""
CPU correlation implementation using Matrix Multiplication.
Parameters
----------
border_mode
The width of a border of implicit zeros to pad the
input with. Must be a tuple with 2 elements giving the numbers of rows
and columns to pad on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime:
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels).
If it is a tuple containing 2 pairs of integers, then these specify
the padding to be applied on each side ((left, right), (top, bottom)).
Otherwise, each width is applied twice, once per side (left and right,
top and bottom).
subsample
The subsample operation applied to each output image.
Should be a tuple with 2 elements.
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
Set to `(1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately.
Should be an integer.
unshared
Boolean value. If true, then a different filter will be applied to
each region of the input image.
"""
_direction = "forward"
def make_node(self, img, kern):
img = as_tensor_variable(img)
kern = as_tensor_variable(kern)
img, kern = self.as_common_dtype(img, kern)
if img.type.ndim != 4:
raise TypeError("img must be 4D tensor")
if self.unshared is True:
if kern.type.ndim != 6:
raise TypeError("kern must be 6D tensor")
else:
if kern.type.ndim != 4:
raise TypeError("kern must be 4D tensor")
out_shape = (
1 if img.type.shape[0] == 1 else None,
1 if kern.type.shape[0] == 1 else None,
None,
None,
)
dtype = img.type.dtype
return Apply(self, [img, kern], [TensorType(dtype, shape=out_shape)()])
def infer_shape(self, fgraph, node, input_shape):
imshp = input_shape[0]
kshp = input_shape[1]
res = get_conv_output_shape(
imshp, kshp, self.border_mode, self.subsample, self.filter_dilation
)
return [res]
def c_code(self, node, nodename, inp, out_, sub):
bottom, weights = inp
(top,) = out_
return super().c_code_helper(bottom, weights, top, sub)
def grad(self, inp, grads):
bottom, weights = inp
(top,) = grads
d_bottom = CorrMM_gradInputs(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(weights, top, bottom.shape[-2:])
d_weights = CorrMM_gradWeights(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(bottom, top, weights.shape[-2:])
return d_bottom, d_weights
class CorrMM_gradWeights(BaseCorrMM):
"""
Gradient wrt. filters for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
PyTensor's automatic differentiation or graph optimization to
use it as needed.
"""
_direction = "backprop weights"
def make_node(self, img, topgrad, shape=None):
img = as_tensor_variable(img)
topgrad = as_tensor_variable(topgrad)
img, topgrad = self.as_common_dtype(img, topgrad)
if img.type.ndim != 4:
raise TypeError("img must be 4D tensor")
if topgrad.type.ndim != 4:
raise TypeError("topgrad must be 4D tensor")
if shape is None:
if self.subsample != (1, 1) or self.border_mode == "half":
raise ValueError(
"shape must be given if subsample != (1, 1)"
' or border_mode == "half"'
)
height_width = []
else:
height_width = [
as_tensor_variable(shape[0]).astype("int64"),
as_tensor_variable(shape[1]).astype("int64"),
]
if self.unshared is True:
out_shape = [
1 if topgrad.type.shape[1] == 1 else None,
None,
None,
1 if img.type.shape[1] == 1 else None,
None,
None,
]
else:
out_shape = [
1 if topgrad.type.shape[1] == 1 else None,
1 if img.type.shape[1] == 1 else None,
None,
None,
]
dtype = img.type.dtype
return Apply(
self, [img, topgrad, *height_width], [TensorType(dtype, shape=out_shape)()]
)
def infer_shape(self, fgraph, node, input_shape):
if self.border_mode == "half":
padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full":
padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple):
border = ()
for mode in self.border_mode:
if isinstance(mode, tuple):
border += ((int(mode[0]), int(mode[1])),)
else:
border += ((int(mode), int(mode)),)
(padH_l, padH_r), (padW_l, padW_r) = border
else:
assert self.border_mode == "valid"
padH_l = padH_r = padW_l = padW_r = 0
dH, dW = self.subsample
imshp = input_shape[0]
topshp = input_shape[1]
ssize, imshp = imshp[1], list(imshp[2:])
ssize = ssize // self.num_groups
nkern, topshp = topshp[1], list(topshp[2:])
height_width = node.inputs[-2:]
if (dH != 1) or (padH_l == -1) or (padH_r == -1):
# vertical subsampling or half padding, kernel height is specified
kH = height_width[0]
elif (padH_l == -2) or (padH_r == -2):
# vertical full padding, we can infer the kernel height
kH = 2 - imshp[0] + (topshp[0] - 1) * dH
else:
# explicit padding, we can infer the kernel height
kH = imshp[0] + padH_l + padH_r - (topshp[0] - 1) * dH
if (dW != 1) or (padW_l == -1) or (padW_r == -1):
kW = height_width[1]
elif (padW_l == -2) or (padW_r == -2):
kW = 2 - imshp[1] + (topshp[1] - 1) * dW
else:
kW = imshp[1] + padW_l + padW_r - (topshp[1] - 1) * dW
if self.unshared is True:
return [(nkern, topshp[0], topshp[1], ssize, kH, kW)]
else:
return [(nkern, ssize, kH, kW)]
def c_code(self, node, nodename, inp, out_, sub):
bottom, top = inp[:2]
height, width = inp[2:] or (None, None)
(weights,) = out_
return super().c_code_helper(bottom, weights, top, sub, height, width)
def grad(self, inp, grads):
bottom, top = inp[:2]
(weights,) = grads
d_bottom = CorrMM_gradInputs(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(weights, top, bottom.shape[-2:])
d_top = CorrMM(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(bottom, weights)
d_height_width = (
(pytensor.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
)
return (d_bottom, d_top, *d_height_width)
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
class CorrMM_gradInputs(BaseCorrMM):
"""
Gradient wrt. inputs for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
PyTensor's automatic differentiation or graph optimization to
use it as needed.
"""
_direction = "backprop inputs"
def make_node(self, kern, topgrad, shape=None):
kern = as_tensor_variable(kern)
topgrad = as_tensor_variable(topgrad)
kern, topgrad = self.as_common_dtype(kern, topgrad)
if self.unshared is True:
if kern.type.ndim != 6:
raise TypeError("kern must be 6D tensor")
else:
if kern.type.ndim != 4:
raise TypeError("kern must be 4D tensor")
if topgrad.type.ndim != 4:
raise TypeError("topgrad must be 4D tensor")
if shape is None:
if self.subsample != (1, 1):
raise ValueError("shape must be given if subsample != (1, 1)")
height_width = []
else:
height_width = [
as_tensor_variable(shape[0]).astype("int64"),
as_tensor_variable(shape[1]).astype("int64"),
]
if self.num_groups > 1:
out_shape = [1 if topgrad.type.shape[0] == 1 else None, None, None, None]
else:
out_shape = [
1 if topgrad.type.shape[0] == 1 else None,
1 if kern.type.shape[-3] == 1 else None,
None,
None,
]
dtype = kern.type.dtype
return Apply(
self, [kern, topgrad, *height_width], [TensorType(dtype, shape=out_shape)()]
)
def infer_shape(self, fgraph, node, input_shape):
if self.border_mode == "half":
padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full":
padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple):
border = ()
for mode in self.border_mode:
if isinstance(mode, tuple):
border += ((int(mode[0]), int(mode[1])),)
else:
border += ((int(mode), int(mode)),)
(padH_l, padH_r), (padW_l, padW_r) = border
else:
assert self.border_mode == "valid"
padH_l = padH_r = padW_l = padW_r = 0
dH, dW = self.subsample
kshp = input_shape[0]
topshp = input_shape[1]
ssize, kshp = kshp[-3], list(kshp[-2:])
ssize = ssize * self.num_groups
bsize, topshp = topshp[0], list(topshp[2:])
height_width = node.inputs[-2:]
if padH_l == -1 or padH_r == -1:
padH_l = padH_r = kshp[0] // 2
elif padH_l == -2 or padH_r == -2:
padH_l = padH_r = kshp[0] - 1
elif padH_l < -2 or padH_r < -2:
raise ValueError("CorrMM_gradInputs: border_mode must be >= 0.")
if padW_l == -1 or padW_r == -1:
padW_l = padW_r = kshp[1] // 2
elif padW_l == -2 or padW_r == -2:
padW_l = padW_r = kshp[1] - 1
elif padW_l < -2 or padW_r < -2:
raise ValueError("CorrMM_gradInputs: border_mode must be >= 0.")
if dH != 1:
out_shp0 = height_width[0]
else:
out_shp0 = (topshp[0] - 1) * dH + kshp[0] - padH_l - padH_r
if dW != 1:
out_shp1 = height_width[1]
else:
out_shp1 = (topshp[1] - 1) * dW + kshp[1] - padW_l - padW_r
out_shp = (out_shp0, out_shp1)
return [(bsize, ssize, *out_shp)]
def c_code(self, node, nodename, inp, out_, sub):
weights, top = inp[:2]
height, width = inp[2:] or (None, None)
(bottom,) = out_
return super().c_code_helper(bottom, weights, top, sub, height, width)
def grad(self, inp, grads):
weights, top = inp[:2]
(bottom,) = grads
d_weights = CorrMM_gradWeights(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(bottom, top, weights.shape[-2:])
d_top = CorrMM(
self.border_mode,
self.subsample,
self.filter_dilation,
self.num_groups,
self.unshared,
)(bottom, weights)
d_height_width = (
(pytensor.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
)
return (d_weights, d_top, *d_height_width)
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
import numpy as np
import pytest
import pytensor
import pytensor.tensor as pt
from pytensor.compile import get_default_mode
from pytensor.compile.mode import Mode
from pytensor.configdefaults import config
from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.link.numba import NumbaLinker
from pytensor.tensor.conv import abstract_conv
from pytensor.tensor.conv.abstract_conv import (
AbstractConv2d,
AbstractConv2d_gradInputs,
AbstractConv2d_gradWeights,
assert_conv_shape,
assert_shape,
bilinear_kernel_1D,
bilinear_kernel_2D,
bilinear_upsampling,
causal_conv1d,
check_conv_gradinputs_shape,
conv2d_transpose,
get_conv_gradinputs_shape,
get_conv_gradweights_shape,
get_conv_output_shape,
separable_conv2d,
separable_conv3d,
)
from pytensor.tensor.type import (
TensorType,
ftensor4,
iscalar,
lvector,
tensor3,
tensor4,
tensor5,
)
from tests import unittest_tools as utt
from tests.tensor.conv import c_conv3d_corr3d_ref, c_conv_corr_ref
def conv2d_corr(
inputs,
filters,
border_mode="valid",
subsample=(1, 1),
conv_mode="conv",
filter_dilation=(1, 1),
):
if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1]
return c_conv_corr_ref.CorrMM(border_mode, subsample, filter_dilation)(
inputs, filters
)
def conv2d_corr_gw(
inputs,
topgrad,
filters_shape,
border_mode="valid",
subsample=(1, 1),
conv_mode="conv",
filter_dilation=(1, 1),
):
rval = c_conv_corr_ref.CorrMM_gradWeights(border_mode, subsample, filter_dilation)(
inputs, topgrad, filters_shape[2:]
)
if conv_mode == "conv":
rval = rval[:, :, ::-1, ::-1]
return rval
def conv2d_corr_gi(
filters,
topgrad,
inputs_shape,
border_mode="valid",
subsample=(1, 1),
conv_mode="conv",
filter_dilation=(1, 1),
):
if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1]
return c_conv_corr_ref.CorrMM_gradInputs(border_mode, subsample, filter_dilation)(
filters, topgrad, inputs_shape[2:]
)
def conv3d_corr(
inputs,
filters,
border_mode="valid",
subsample=(1, 1, 1),
conv_mode="conv",
filter_dilation=(1, 1, 1),
):
if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1, ::-1]
return c_conv3d_corr3d_ref.Corr3dMM(border_mode, subsample, filter_dilation)(
inputs, filters
)
def conv3d_corr_gw(
inputs,
topgrad,
filters_shape,
border_mode="valid",
subsample=(1, 1, 1),
conv_mode="conv",
filter_dilation=(1, 1, 1),
):
rval = c_conv3d_corr3d_ref.Corr3dMMGradWeights(
border_mode, subsample, filter_dilation
)(inputs, topgrad, filters_shape[2:])
if conv_mode == "conv":
rval = rval[:, :, ::-1, ::-1, ::-1]
return rval
def conv3d_corr_gi(
filters,
topgrad,
inputs_shape,
border_mode="valid",
subsample=(1, 1, 1),
conv_mode="conv",
filter_dilation=(1, 1, 1),
):
if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1, ::-1]
return c_conv3d_corr3d_ref.Corr3dMMGradInputs(
border_mode, subsample, filter_dilation
)(filters, topgrad, inputs_shape[2:])
class TestGetConvOutShape:
def test_basic(self):
image_shape, kernel_shape = (3, 2, 12, 9), (4, 2, 5, 6)
sub_sample = (1, 2)
filter_dilation = (2, 1)
test1_params = get_conv_output_shape(
image_shape, kernel_shape, "valid", sub_sample, filter_dilation
)
test2_params = get_conv_output_shape(
image_shape, kernel_shape, "half", sub_sample, filter_dilation
)
test3_params = get_conv_output_shape(
image_shape, kernel_shape, "full", sub_sample, filter_dilation
)
test4_params = get_conv_output_shape(
image_shape, kernel_shape, (1, 2), sub_sample, filter_dilation
)
assert test1_params == (3, 4, 4, 2)
assert test2_params == (3, 4, 12, 5)
assert test3_params == (3, 4, 20, 7)
assert test4_params == (3, 4, 6, 4)
def test_basic_3d(self):
image_shape, kernel_shape = (3, 2, 12, 9, 7), (4, 2, 5, 6, 4)
sub_sample = (1, 2, 1)
filter_dilation = (2, 1, 1)
test1_params = get_conv_output_shape(
image_shape, kernel_shape, "valid", sub_sample, filter_dilation
)
test2_params = get_conv_output_shape(
image_shape, kernel_shape, "half", sub_sample, filter_dilation
)
test3_params = get_conv_output_shape(
image_shape, kernel_shape, "full", sub_sample, filter_dilation
)
test4_params = get_conv_output_shape(
image_shape, kernel_shape, (1, 2, 3), sub_sample, filter_dilation
)
assert test1_params == (3, 4, 4, 2, 4)
assert test2_params == (3, 4, 12, 5, 8)
assert test3_params == (3, 4, 20, 7, 10)
assert test4_params == (3, 4, 6, 4, 10)
class TestConvGradInputsShape:
def test_check_shape(self):
for i in range(1, 20):
for k in range(1, 10):
for b in ("valid", "half", "full", (0, 2)):
for s in (1, 2, 3):
for d in (1, 2, 3):
image_shape = (59, 61, i, i)
kernel_shape = (67, 61, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, b, (s, s), (d, d)
)
# this should be accepted
assert check_conv_gradinputs_shape(
image_shape,
kernel_shape,
computed_shape,
b,
(s, s),
(d, d),
)
# one or more None should also be accepted
trial_shape = (None, None, computed_shape[2], None)
assert check_conv_gradinputs_shape(
image_shape,
kernel_shape,
trial_shape,
b,
(s, s),
(d, d),
)
# the batch size and number of filters are important
trial_shape = (1, 1, computed_shape[2], computed_shape[3])
assert not check_conv_gradinputs_shape(
image_shape,
kernel_shape,
trial_shape,
b,
(s, s),
(d, d),
)
# outputs that are too large or too small should be rejected
for o in (-3, -2, -1, 1, 2, 3):
trial_shape = (
computed_shape[0],
computed_shape[1],
computed_shape[2] + o,
computed_shape[3] + o,
)
assert not check_conv_gradinputs_shape(
image_shape,
kernel_shape,
trial_shape,
b,
(s, s),
(d, d),
)
def test_get_shape(self):
for i in range(1, 20):
for k in range(1, 10):
for b in ("valid", "half", "full", (0, 2)):
for d in (1, 2, 3):
image_shape = (59, 61, i, i)
kernel_shape = (67, 61, k, k)
# compute the output that these inputs and parameters would produce
output_shape = get_conv_output_shape(
image_shape, kernel_shape, b, (1, 1), (d, d)
)
# compute the image_shape given this output_shape
computed_image_shape = get_conv_gradinputs_shape(
kernel_shape, output_shape, b, (1, 1), (d, d)
)
assert computed_image_shape == image_shape
# if subsample > 1, the shape should be None
computed_image_shape = get_conv_gradinputs_shape(
kernel_shape, output_shape, b, (2, 3), (d, d)
)
image_shape_with_None = (*image_shape[:2], None, None)
assert computed_image_shape == image_shape_with_None
# compute the kernel_shape given this output_shape
computed_kernel_shape = get_conv_gradweights_shape(
image_shape, output_shape, b, (1, 1), (d, d)
)
# if border_mode == 'half', the shape should be None
if b == "half":
kernel_shape_with_None = (*kernel_shape[:2], None, None)
assert computed_kernel_shape == kernel_shape_with_None
else:
assert computed_kernel_shape == kernel_shape
# if subsample > 1, the shape should be None
computed_kernel_shape = get_conv_gradweights_shape(
kernel_shape, output_shape, b, (2, 3), (d, d)
)
kernel_shape_with_None = (*kernel_shape[:2], None, None)
assert computed_kernel_shape == kernel_shape_with_None
class TestAssertConvShape:
def test_basic(self):
shape = tuple(iscalar() for i in range(4))
f = pytensor.function(shape, assert_conv_shape(shape))
assert [1, 2, 3, 4] == f(1, 2, 3, 4)
assert [0, 0, 1, 1] == f(0, 0, 1, 1)
with pytest.raises(AssertionError):
f(3, 3, 3, 0)
with pytest.raises(AssertionError):
f(3, 3, 0, 3)
with pytest.raises(AssertionError):
f(3, 3, -1, 3)
with pytest.raises(AssertionError):
f(3, -1, 3, 3)
with pytest.raises(AssertionError):
f(-1, 3, 3, 3)
class TestAssertShape:
@config.change_flags(conv__assert_shape=True)
def test_basic(self):
x = tensor4()
s1 = iscalar()
s2 = iscalar()
expected_shape = [None, s1, s2, None]
f = pytensor.function([x, s1, s2], assert_shape(x, expected_shape))
v = np.zeros((3, 5, 7, 11), dtype="float32")
assert 0 == np.sum(f(v, 5, 7))
with pytest.raises(AssertionError):
f(v, 5, 0)
with pytest.raises(AssertionError):
f(v, 5, 9)
with pytest.raises(AssertionError):
f(v, 0, 7)
with pytest.raises(AssertionError):
f(v, 7, 7)
@config.change_flags(conv__assert_shape=True)
def test_shape_check_conv2d(self):
input = tensor4()
filters = tensor4()
out = abstract_conv.abstract_conv2d(
input, filters, input_shape=(3, 5, 7, 11), filter_shape=(7, 5, 3, 3)
)
f = pytensor.function([input, filters], out)
# mismatched input_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 5, 9, 11), dtype="float32"),
np.zeros((7, 5, 3, 3), dtype="float32"),
)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 5, 7, 11), dtype="float32"),
np.zeros((7, 5, 2, 2), dtype="float32"),
)
@config.change_flags(conv__assert_shape=True)
@pytest.mark.skipif(config.cxx == "", reason="test needs cxx")
def test_shape_check_conv3d(self):
input = tensor5()
filters = tensor5()
out = abstract_conv.conv3d(
input, filters, input_shape=(3, 5, 7, 11, 13), filter_shape=(7, 5, 3, 3, 3)
)
f = pytensor.function([input, filters], out)
# mismatched input_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 5, 9, 11, 13), dtype="float32"),
np.zeros((7, 5, 3, 3, 3), dtype="float32"),
)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 5, 7, 11, 13), dtype="float32"),
np.zeros((7, 5, 2, 2, 2), dtype="float32"),
)
@config.change_flags(conv__assert_shape=True)
def test_shape_check_conv2d_grad_wrt_inputs(self):
output_grad = tensor4()
filters = tensor4()
out = abstract_conv.conv2d_grad_wrt_inputs(
output_grad,
filters,
input_shape=(None, None, 7, 11),
filter_shape=(7, 5, 3, 3),
)
f = pytensor.function([output_grad, filters], out)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 6, 5, 9), dtype="float32"),
np.zeros((7, 6, 3, 3), dtype="float32"),
)
@config.change_flags(conv__assert_shape=True)
@pytest.mark.skipif(config.cxx == "", reason="test needs cxx")
def test_shape_check_conv3d_grad_wrt_inputs(self):
output_grad = tensor5()
filters = tensor5()
out = abstract_conv.conv3d_grad_wrt_inputs(
output_grad,
filters,
input_shape=(None, None, 7, 11, 13),
filter_shape=(7, 5, 3, 3, 3),
)
f = pytensor.function([output_grad, filters], out)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 6, 5, 9, 11), dtype="float32"),
np.zeros((7, 6, 3, 3, 3), dtype="float32"),
)
@config.change_flags(conv__assert_shape=True)
def test_shape_check_conv2d_grad_wrt_weights(self):
input = tensor4()
output_grad = tensor4()
out = abstract_conv.conv2d_grad_wrt_weights(
input,
output_grad,
filter_shape=(None, None, 3, 3),
input_shape=(3, 5, 7, 11),
)
f = pytensor.function([input, output_grad], out)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 6, 7, 11), dtype="float32"),
np.zeros((3, 7, 5, 9), dtype="float32"),
)
@config.change_flags(conv__assert_shape=True)
@pytest.mark.skipif(config.cxx == "", reason="test needs cxx")
def test_shape_check_conv3d_grad_wrt_weights(self):
input = tensor5()
output_grad = tensor5()
out = abstract_conv.conv3d_grad_wrt_weights(
input,
output_grad,
filter_shape=(None, None, 3, 3, 3),
input_shape=(3, 5, 7, 11, 13),
)
f = pytensor.function([input, output_grad], out)
# mismatched filter_shape
with pytest.raises(AssertionError):
f(
np.zeros((3, 6, 7, 11, 13), dtype="float32"),
np.zeros((3, 7, 5, 9, 11), dtype="float32"),
)
class BaseTestConv:
def get_output_shape(
self, inputs_shape, filters_shape, subsample, border_mode, filter_dilation
):
dil_filters = tuple(
(s - 1) * d + 1
for s, d in zip(filters_shape[2:], filter_dilation, strict=True)
)
if border_mode == "valid":
border_mode = (0,) * (len(inputs_shape) - 2)
if border_mode == "half":
border_mode = tuple(d // 2 for d in dil_filters)
if border_mode == "full":
border_mode = tuple(d - 1 for d in dil_filters)
batch_size = inputs_shape[0]
num_filters = filters_shape[0]
return (
batch_size,
num_filters,
*(
None
if i is None or k is None
else (i + 2 * pad - ((k - 1) * fd + 1)) // d + 1
for i, k, d, pad, fd in zip(
inputs_shape[2:],
filters_shape[2:],
subsample,
border_mode,
filter_dilation,
strict=True,
)
),
)
def run_fwd(
self,
inputs_shape,
filters_shape,
conv_fn,
conv_op,
ref,
subsample=None,
verify_grad=True,
mode=None,
border_mode="valid",
filter_flip=True,
provide_shape=False,
target_op=None,
check_trace=False,
filter_dilation=None,
):
if subsample is None:
subsample = (1,) * (len(inputs_shape) - 2)
if filter_dilation is None:
filter_dilation = (1,) * (len(inputs_shape) - 2)
inputs_val = np.random.random(inputs_shape).astype("float32")
filters_val = np.random.random(filters_shape).astype("float32")
# scale down values to prevent rounding errors
inputs_val /= 10
filters_val /= 10
inputs = self.shared(inputs_val)
filters = self.shared(filters_val)
if provide_shape:
imshp = inputs_shape
kshp = filters_shape
else:
imshp = None
kshp = None
if filter_flip:
conv_mode = "conv"
else:
conv_mode = "cross"
c_ref = ref(
inputs,
filters,
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
filter_dilation=filter_dilation,
)
c = conv_fn(
inputs,
filters,
border_mode=border_mode,
subsample=subsample,
filter_flip=filter_flip,
input_shape=imshp,
filter_shape=kshp,
filter_dilation=filter_dilation,
)
f_ref = pytensor.function([], c_ref, mode="FAST_RUN")
f = pytensor.function([], c, mode=mode)
if target_op is not None:
assert any(isinstance(n.op, target_op) for n in f.maker.fgraph.toposort())
if check_trace:
assert check_stack_trace(f, ops_to_check=target_op)
res_ref = np.array(f_ref())
res = np.array(f())
utt.assert_allclose(res_ref, res)
if (
verify_grad
and inputs_val.size > 0
and filters_val.size > 0
and res.size > 0
):
utt.verify_grad(
conv_op(
border_mode=border_mode,
imshp=imshp,
kshp=kshp,
subsample=subsample,
filter_dilation=filter_dilation,
),
[inputs_val, filters_val],
mode=mode,
)
def run_gradweight(
self,
inputs_shape,
filters_shape,
output_shape,
gradWeights_fn,
ref,
subsample=None,
filter_flip=True,
verify_grad=True,
mode=None,
border_mode="valid",
provide_shape=False,
target_op=None,
check_trace=False,
filter_dilation=None,
):
if subsample is None:
subsample = (1,) * (len(inputs_shape) - 2)
if filter_dilation is None:
filter_dilation = (1,) * (len(inputs_shape) - 2)
inputs_val = np.random.random(inputs_shape).astype("float32")
output_val = np.random.random(output_shape).astype("float32")
inputs = self.shared(inputs_val)
output = self.shared(output_val)
if provide_shape:
imshp = inputs_shape
kshp = filters_shape
else:
imshp = None
kshp = None
if filter_flip:
conv_mode = "conv"
else:
conv_mode = "cross"
c = gradWeights_fn(
border_mode=border_mode,
filter_flip=filter_flip,
subsample=subsample,
imshp=imshp,
kshp=kshp,
filter_dilation=filter_dilation,
)
c = c(inputs, output, filters_shape[2:])
c_ref = ref(
inputs,
output,
filters_shape,
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
filter_dilation=filter_dilation,
)
f = pytensor.function([], c, mode=mode)
f_ref = pytensor.function([], c_ref, mode="FAST_RUN")
if target_op is not None:
assert any(isinstance(n.op, target_op) for n in f.maker.fgraph.toposort())
if check_trace:
assert check_stack_trace(f, ops_to_check=target_op)
res_ref = np.array(f_ref())
res = np.array(f())
utt.assert_allclose(res_ref, res)
def abstract_conv_gradweight(inputs_val, output_val):
conv_op = gradWeights_fn(
border_mode=border_mode,
subsample=subsample,
filter_dilation=filter_dilation,
)
return conv_op(inputs_val, output_val, filters_shape[2:])
if verify_grad and inputs_val.size > 0 and output_val.size > 0 and res.size > 0:
utt.verify_grad(
abstract_conv_gradweight, [inputs_val, output_val], mode=mode, eps=1
)
def run_gradinput(
self,
inputs_shape,
filters_shape,
output_shape,
gradInputs_fn,
ref,
subsample=None,
filter_flip=True,
verify_grad=True,
mode=None,
border_mode="valid",
provide_shape=False,
target_op=None,
check_trace=False,
filter_dilation=None,
):
if subsample is None:
subsample = (1,) * (len(inputs_shape) - 2)
if filter_dilation is None:
filter_dilation = (1,) * (len(inputs_shape) - 2)
output_val = np.random.random(output_shape).astype("float32")
filters_val = np.random.random(filters_shape).astype("float32")
output = self.shared(output_val)
filters = self.shared(filters_val)
if provide_shape:
imshp = inputs_shape
kshp = filters_shape
else:
imshp = None
kshp = None
if filter_flip:
conv_mode = "conv"
else:
conv_mode = "cross"
c = gradInputs_fn(
border_mode=border_mode,
subsample=subsample,
filter_flip=filter_flip,
imshp=imshp,
kshp=kshp,
filter_dilation=filter_dilation,
)
c = c(filters, output, inputs_shape[2:])
f = pytensor.function([], c, mode=mode)
# ref is set to None for the inconsistent-shape tests.
# The reference function also raises an exception, which would
# mask the exception generated by the target implementation.
if ref is not None:
c_ref = ref(
filters,
output,
inputs_shape,
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
filter_dilation=filter_dilation,
)
f_ref = pytensor.function([], c_ref, mode="FAST_RUN")
if target_op is not None:
assert any(isinstance(n.op, target_op) for n in f.maker.fgraph.toposort())
if check_trace:
assert check_stack_trace(f, ops_to_check=target_op)
res = np.array(f())
if ref is not None:
res_ref = np.array(f_ref())
utt.assert_allclose(res_ref, res)
def abstract_conv_gradinputs(filters_val, output_val):
conv_op = gradInputs_fn(
border_mode=border_mode,
subsample=subsample,
filter_dilation=filter_dilation,
)
return conv_op(filters_val, output_val, inputs_shape[2:])
if (
verify_grad
and filters_val.size > 0
and output_val.size > 0
and res.size > 0
):
utt.verify_grad(
abstract_conv_gradinputs, [filters_val, output_val], mode=mode, eps=1
)
def run_test_case(self, *args, **kargs):
raise NotImplementedError()
@pytest.mark.xfail(
condition=isinstance(get_default_mode().linker, NumbaLinker),
reason="Involves Ops with no Python implementation for numba to use as fallback",
)
def test_all(self):
ds = self.default_subsamples
db = self.default_border_mode
dflip = self.default_filter_flip
dprovide_shape = self.default_provide_shape
for i, f in zip(self.inputs_shapes, self.filters_shapes, strict=True):
for provide_shape in self.provide_shape:
self.run_test_case(i, f, ds, db, dflip, provide_shape)
if min(i) > 0 and min(f) > 0:
for fd in self.filters_dilations:
for s in self.subsamples:
for b in self.border_modes:
self.run_test_case(i, f, s, b, dflip, dprovide_shape, fd)
for flip in self.filter_flip:
self.run_test_case(i, f, ds, db, flip, dprovide_shape)
class BaseTestConv2d(BaseTestConv):
@classmethod
def setup_class(cls):
# This tests can run even when config.blas__ldflags is empty.
cls.inputs_shapes = [
(8, 1, 6, 6),
(8, 1, 8, 8),
(2, 1, 7, 7),
(6, 1, 10, 11),
(2, 1, 6, 5),
(1, 5, 9, 9),
(0, 1, 6, 6),
(1, 0, 6, 6),
(1, 1, 6, 6),
]
cls.filters_shapes = [
(5, 1, 2, 2),
(4, 1, 3, 3),
(2, 1, 3, 3),
(1, 1, 2, 3),
(4, 1, 1, 3),
(4, 5, 3, 2),
(1, 1, 2, 2),
(1, 0, 2, 2),
(0, 1, 2, 2),
]
cls.subsamples = [(1, 1), (2, 2), (2, 4)]
cls.default_subsamples = (1, 1)
cls.filters_dilations = [(1, 1), (1, 2), (2, 1)]
cls.default_filters_dilations = (1, 1)
cls.border_modes = ["valid", "half", "full", (0, 0), (1, 1), (5, 5), (5, 2)]
cls.default_border_mode = (0, 0)
cls.filter_flip = [True, False]
cls.default_filter_flip = True
cls.provide_shape = [True, False]
cls.default_provide_shape = True
cls.shared = staticmethod(pytensor.compile.shared)
def run_test_case_gi(self, *args, **kwargs):
raise NotImplementedError()
@pytest.mark.xfail(
condition=isinstance(get_default_mode().linker, NumbaLinker),
reason="Involves Ops with no Python implementation for numba to use as fallback",
)
def test_gradinput_arbitrary_output_shapes(self):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape = (2, 1, 7, 7)
filter_shape = (2, 1, 3, 3)
for output_shape in [(2, 2, 8, 8), (2, 2, 9, 9), (2, 2, 12, 12)]:
for border_mode in ["valid", "half", "full"]:
computed_shape = get_conv_output_shape(
input_shape,
filter_shape,
border_mode,
self.default_subsamples,
self.default_filters_dilations,
)
# is this a valid combination?
if tuple(computed_shape) == output_shape:
self.run_test_case_gi(
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
False,
)
else:
# expect an error
self.run_test_case_gi(
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
True,
)
def test_gradinput_impossible_output_shapes(self):
def run_for_output_offsets(image_shape, kernel_shape, s, border_mode, d):
# outputs that are too large or too small should be rejected
for o in (-3, -1, 1, 2):
output_shape = (1, 1, computed_shape[2] + o, computed_shape[3] + o)
# expect an error
self.run_test_case_gi(
image_shape,
kernel_shape,
output_shape,
(s, s),
border_mode,
True,
True,
(d, d),
True,
)
for i, k in ((1, 1), (1, 2), (2, 1), (4, 2), (4, 3), (7, 3), (9, 5)):
for border_mode in ("valid", "half", "full", (0, 2)):
for s, d in ((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (1, 3)):
image_shape = (1, 1, i, i)
kernel_shape = (1, 1, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, border_mode, (s, s), (d, d)
)
run_for_output_offsets(
image_shape,
kernel_shape,
s,
border_mode,
d,
)
def run_fwd(
self,
inputs_shape,
filters_shape,
conv_fn=abstract_conv.abstract_conv2d,
conv_op=abstract_conv.AbstractConv2d,
ref=conv2d_corr,
**kwargs,
):
super().run_fwd(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
conv_fn=conv_fn,
conv_op=conv_op,
ref=ref,
**kwargs,
)
def run_gradweight(
self,
inputs_shape,
filters_shape,
output_shape,
gradWeights_fn=abstract_conv.AbstractConv2d_gradWeights,
ref=conv2d_corr_gw,
**kwargs,
):
super().run_gradweight(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
output_shape=output_shape,
gradWeights_fn=gradWeights_fn,
ref=ref,
**kwargs,
)
def run_gradinput(
self,
inputs_shape,
filters_shape,
output_shape,
gradInputs_fn=abstract_conv.AbstractConv2d_gradInputs,
ref=conv2d_corr_gi,
**kwargs,
):
super().run_gradinput(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
output_shape=output_shape,
gradInputs_fn=gradInputs_fn,
ref=ref,
**kwargs,
)
@pytest.mark.skipif(config.cxx == "", reason="cxx needed")
class TestAbstractConvNoOptim(BaseTestConv2d):
@classmethod
def setup_class(cls):
# This tests can run even when config.blas__ldflags is empty.
super().setup_class()
cls.inputs_shapes = [(8, 1, 6, 6)]
cls.filters_shapes = [(5, 1, 2, 2)]
cls.subsamples = [(1, 1), (2, 2)]
cls.filters_dilations = [(1, 1), (1, 2), (2, 1)]
cls.border_modes = ["valid", "half", "full"]
cls.filter_flip = [True]
cls.provide_shape = [False]
def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
o = self.get_output_shape(i, f, s, b, fd)
mode = Mode(optimizer=None)
self.run_fwd(
inputs_shape=i,
filters_shape=f,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=None,
check_trace=True,
filter_dilation=fd,
mode=mode,
)
self.run_gradweight(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=None,
check_trace=True,
filter_dilation=fd,
mode=mode,
)
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=None,
check_trace=True,
filter_dilation=fd,
mode=mode,
)
def run_test_case_gi(
self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False
):
mode = Mode(optimizer=None)
if not expect_error:
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=None,
check_trace=True,
filter_dilation=fd,
mode=mode,
)
else:
with pytest.raises(ValueError):
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=None,
check_trace=True,
filter_dilation=fd,
ref=None,
mode=mode,
)
class BaseTestConv3d(BaseTestConv):
@classmethod
def setup_class(cls):
# This tests can run even when config.blas__ldflags is empty.
cls.inputs_shapes = [
(2, 1, 5, 5, 5),
(1, 2, 7, 5, 6),
(0, 1, 5, 5, 5),
(1, 0, 5, 5, 5),
(1, 1, 5, 5, 5),
]
cls.filters_shapes = [
(2, 1, 2, 2, 2),
(1, 2, 2, 1, 3),
(1, 1, 2, 2, 2),
(1, 0, 2, 2, 2),
(0, 1, 2, 2, 2),
]
cls.subsamples = [(1, 1, 1), (2, 2, 2), (1, 2, 3)]
cls.default_subsamples = (1, 1, 1)
cls.filters_dilations = [(1, 1, 1), (1, 2, 1), (2, 1, 2)]
cls.default_filters_dilations = (1, 1, 1)
cls.border_modes = ["valid", "half", "full", (0, 0, 0), (2, 2, 3)]
cls.default_border_mode = (0, 0, 0)
cls.filter_flip = [True, False]
cls.default_filter_flip = True
cls.provide_shape = [True, False]
cls.default_provide_shape = True
cls.shared = staticmethod(pytensor.compile.shared)
def test_gradinput_arbitrary_output_shapes(self):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape = (2, 1, 7, 7, 7)
filter_shape = (1, 1, 3, 3, 3)
for output_shape in [(2, 1, 8, 8, 8), (2, 1, 9, 9, 9), (2, 1, 12, 12, 12)]:
for border_mode in ["valid", "half", "full"]:
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
input_shape,
filter_shape,
border_mode,
self.default_subsamples,
self.default_filters_dilations,
)
# is this a valid combination?
if tuple(computed_shape) == output_shape:
self.run_test_case_gi(
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
False,
)
else:
# expect an error
self.run_test_case_gi(
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
True,
)
def test_gradinput_impossible_output_shapes(self):
def run_for_output_offsets(image_shape, kernel_shape, s, border_mode, d):
# outputs that are too large or too small should be rejected
for o in (-3, -1, 1, 2):
output_shape = (
1,
1,
computed_shape[2] + o,
computed_shape[3] + o,
computed_shape[4] + o,
)
# expect an error
self.run_test_case_gi(
image_shape,
kernel_shape,
output_shape,
(s, s),
border_mode,
True,
True,
(d, d),
True,
)
for i, k in ((1, 1), (1, 2), (2, 1), (4, 2), (4, 3), (7, 3), (9, 5)):
for border_mode in ("valid", "half", "full", (0, 2, 1)):
for s, d in ((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (1, 3)):
image_shape = (1, 1, i, i, i)
kernel_shape = (1, 1, k, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, border_mode, (s, s, s), (d, d, d)
)
run_for_output_offsets(
image_shape,
kernel_shape,
s,
border_mode,
d,
)
def run_fwd(
self,
inputs_shape,
filters_shape,
conv_fn=abstract_conv.conv3d,
conv_op=abstract_conv.AbstractConv3d,
ref=conv3d_corr,
**kwargs,
):
super().run_fwd(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
conv_fn=conv_fn,
conv_op=conv_op,
ref=ref,
**kwargs,
)
def run_gradweight(
self,
inputs_shape,
filters_shape,
output_shape,
gradWeights_fn=abstract_conv.AbstractConv3d_gradWeights,
ref=conv3d_corr_gw,
**kwargs,
):
super().run_gradweight(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
output_shape=output_shape,
gradWeights_fn=gradWeights_fn,
ref=ref,
**kwargs,
)
def run_gradinput(
self,
inputs_shape,
filters_shape,
output_shape,
gradInputs_fn=abstract_conv.AbstractConv3d_gradInputs,
ref=conv3d_corr_gi,
**kwargs,
):
super().run_gradinput(
inputs_shape=inputs_shape,
filters_shape=filters_shape,
output_shape=output_shape,
gradInputs_fn=gradInputs_fn,
ref=ref,
**kwargs,
)
def test_constant_shapes():
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
# are rejected if not constant or None
dummy_t4 = ftensor4()
alloc_dummy_t4 = pt.zeros((3, 5, 7, 11), dtype="float32")
dummy_shape = lvector()
dummy_one_shape = pt.ones(4, dtype="int64")
constant_vec_shape = pt.constant([3, 5, 7, 11])
tuple_shape = (3, 5, 7, 11)
list_shape = list(tuple_shape)
constant_list_shape = [pt.constant(i, dtype="int64") for i in tuple_shape]
constant_tuple_shape = tuple(constant_list_shape)
bad_shapes = (
dummy_shape,
dummy_one_shape,
dummy_t4.shape,
alloc_dummy_t4.shape,
constant_vec_shape,
)
good_shapes = (constant_list_shape, constant_tuple_shape, tuple_shape, list_shape)
ops_to_test = (
AbstractConv2d,
AbstractConv2d_gradInputs,
AbstractConv2d_gradWeights,
)
for op in ops_to_test:
for shp in bad_shapes:
with pytest.raises(ValueError):
op(imshp=shp)
with pytest.raises(ValueError):
op(kshp=shp)
for shp in good_shapes:
op(imshp=shp)
op(kshp=shp)
class TestConvTypes:
def setup_method(self):
self.input = ftensor4()
self.filters = ftensor4()
self.topgrad = ftensor4()
self.constant_tensor = np.zeros((3, 5, 7, 11), dtype="float32")
def test_grad_types(self):
# This function simply tests the behaviour of the AbstractConv
# Ops, not their optimizations
input = self.input
filters = self.filters
topgrad = self.topgrad
out_shape = lvector()
output = abstract_conv.abstract_conv2d(input, filters)
grad_input, grad_filters = pytensor.grad(output.sum(), wrt=(input, filters))
assert grad_input.type == input.type, (
grad_input,
grad_input.type,
input,
input.type,
)
assert grad_filters.type == filters.type, (
grad_filters,
grad_filters.type,
filters,
filters.type,
)
grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
input, topgrad, out_shape
)
grad_input, grad_topgrad = pytensor.grad(
grad_filters.sum(), wrt=(input, topgrad)
)
assert grad_input.type == input.type, (
grad_input,
grad_input.type,
input,
input.type,
)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad,
grad_topgrad.type,
topgrad,
topgrad.type,
)
grad_input = abstract_conv.AbstractConv2d_gradInputs()(
filters, topgrad, out_shape
)
grad_filters, grad_topgrad = pytensor.grad(
grad_input.sum(), wrt=(filters, topgrad)
)
assert grad_filters.type == filters.type, (
grad_filters,
grad_filters.type,
filters,
filters.type,
)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad,
grad_topgrad.type,
topgrad,
topgrad.type,
)
def test_constant_input(self):
# Check the AbstractConv Ops for constant inputs
input = self.input
filters = self.filters
topgrad = self.topgrad
constant_tensor = self.constant_tensor
out_shape = lvector()
# Check the forward Op
output = abstract_conv.abstract_conv2d(constant_tensor, filters)
grad_filters = pytensor.grad(output.sum(), wrt=filters)
assert filters.type.is_super(grad_filters.type), (
grad_filters,
grad_filters.type,
filters,
filters.type,
)
output = abstract_conv.abstract_conv2d(input, constant_tensor)
grad_input = pytensor.grad(output.sum(), wrt=input)
assert input.type.is_super(grad_input.type), (
grad_input,
grad_input.type,
input,
input.type,
)
# Check grad wrt weights
grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
constant_tensor, topgrad, out_shape
)
grad_topgrad = pytensor.grad(grad_filters.sum(), wrt=topgrad)
assert topgrad.type.is_super(grad_topgrad.type), (
grad_topgrad,
grad_topgrad.type,
topgrad,
topgrad.type,
)
grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
input, constant_tensor, out_shape
)
grad_input = pytensor.grad(grad_filters.sum(), wrt=input)
assert grad_input.type == input.type, (
grad_input,
grad_input.type,
input,
input.type,
)
# Check grad wrt inputs
grad_input = abstract_conv.AbstractConv2d_gradInputs()(
constant_tensor, topgrad, out_shape
)
grad_topgrad = pytensor.grad(grad_input.sum(), wrt=topgrad)
assert topgrad.type.is_super(grad_topgrad.type), (
grad_topgrad,
grad_topgrad.type,
topgrad,
topgrad.type,
)
grad_input = abstract_conv.AbstractConv2d_gradInputs()(
filters, constant_tensor, out_shape
)
grad_filters = pytensor.grad(grad_input.sum(), wrt=filters)
assert grad_filters.type == filters.type, (
grad_filters,
grad_filters.type,
filters,
filters.type,
)
class TestBilinearUpsampling:
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
compile_mode = pytensor.compile.mode.get_default_mode()
if config.mode == "FAST_COMPILE":
compile_mode = compile_mode.excluding("conv_gemm")
compile_mode = compile_mode.excluding("AbstractConvCheck")
elif not config.cxx:
compile_mode = compile_mode.excluding("AbstractConvCheck")
def numerical_kernel_1D(self, ratio):
"""
Gets numerical 1D kernel for bilinear upsampling
"""
return np.array(list(range(1, ratio + 1)) + list(range(ratio - 1, 0, -1)))
def numerical_kernel_2D(self, ratio):
"""
Gets numerical 2D kernel for bilinear upsampling
"""
return np.array(
[
i * j
for i in self.numerical_kernel_1D(ratio)
for j in self.numerical_kernel_1D(ratio)
]
).reshape(2 * ratio - 1, 2 * ratio - 1)
def test_bilinear_kernel_2D(self):
# Test 2D kernels used in bilinear upsampling
#
# This method tests the correctness of the
# 2D kernel values used in bilinear upsampling
# for some upsampling ratios.
for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
# getting the un-normalized kernel
kernel = bilinear_kernel_2D(ratio=ratio, normalize=False)
f = pytensor.function([], kernel)
kernel_2D = self.numerical_kernel_2D(ratio)
utt.assert_allclose(kernel_2D, f())
# getting the normalized kernel
kernel = bilinear_kernel_2D(ratio=ratio, normalize=True)
f = pytensor.function([], kernel)
kernel_2D = kernel_2D / float(ratio**2)
utt.assert_allclose(kernel_2D, f())
def test_bilinear_kernel_1D(self):
# Test 1D kernels used in bilinear upsampling
#
# This method tests the correctness of the
# 1D kernel values used in bilinear upsampling
# for some upsampling ratios.
rat = iscalar()
kernel_ten = bilinear_kernel_1D(ratio=rat, normalize=False)
f_ten = pytensor.function([rat], kernel_ten)
kernel_ten_norm = bilinear_kernel_1D(ratio=rat, normalize=True)
f_ten_norm = pytensor.function([rat], kernel_ten_norm)
for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
# getting the un-normalized kernel
kernel = bilinear_kernel_1D(ratio=ratio, normalize=False)
f = pytensor.function([], kernel)
kernel_1D = self.numerical_kernel_1D(ratio)
utt.assert_allclose(kernel_1D, f())
utt.assert_allclose(kernel_1D, f_ten(ratio))
# getting the normalized kernel
kernel = bilinear_kernel_1D(ratio=ratio, normalize=True)
f = pytensor.function([], kernel)
kernel_1D = kernel_1D / float(ratio)
utt.assert_allclose(kernel_1D, f())
utt.assert_allclose(kernel_1D, f_ten_norm(ratio))
def numerical_upsampling_multiplier(self, ratio):
"""
Compute upsampling multiplier
This method computes the multipliers of an array
that will be upsampled using bilinear interpolation.
Parameters
----------
ratio: int
the ratio by which the array will be upsampled.
Returns
-------
1D numpy array
The multipliers that can be used in bilinear interpolation
to upsample an array.
int
The size of the multipliers array
"""
kern = np.arange(ratio + 1)
return kern, kern.shape[0]
def get_upsampled_twobytwo_mat(self, two_by_two, ratio):
"""
Upsample 4D array with two rows and two columns
This method gets a 4D numpy array with two rows and two columns
and computes its upsampled array by using bilinear interpolation
Parameters
----------
two_by_two: numpy 4D array
The array that will be upsampled by bilinear interpolation.
Array is of shape (batch size, num channels, 2, 2)
ratio: int
The ratio by which two_by_two's last
two dimensions (row and col) will be upsampled.
Returns
-------
4D numpy array
The array upsampled by using bilinear interpolation. Array
is of shape (batch size, num channels, 2*ratio, 2*ratio).
"""
kern, _shp = self.numerical_upsampling_multiplier(ratio)
up_1D = two_by_two[:, :, :, :1] * kern[::-1] + two_by_two[:, :, :, 1:] * kern
up_2D = (
up_1D[:, :, :1, :] * kern[::-1][:, np.newaxis]
+ up_1D[:, :, 1:, :] * kern[:, np.newaxis]
)
num_concat = (ratio - 1) // 2
for i in range(num_concat):
up_2D = np.concatenate([up_2D[:, :, :1, :], up_2D], axis=2)
up_2D = np.concatenate([up_2D, up_2D[:, :, -1:, :]], axis=2)
up_2D = np.concatenate([up_2D[:, :, :, :1], up_2D], axis=3)
up_2D = np.concatenate([up_2D, up_2D[:, :, :, -1:]], axis=3)
if ratio % 2 == 0:
up_2D = np.concatenate([up_2D, up_2D[:, :, -1:, :]], axis=2)
up_2D = np.concatenate([up_2D, up_2D[:, :, :, -1:]], axis=3)
return up_2D / float(ratio) ** 2
def test_bilinear_upsampling_1D(self):
# Test bilinear upsampling using 1D kernels
#
# This method tests the bilinear_upsampling method
# when using 1D kernels for some upsampling ratios.
# upsampling for a ratio of two
input_x = np.array([[[[1, 2], [3, 4]]]], dtype=config.floatX)
for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
bilin_mat = bilinear_upsampling(
input=input_x,
ratio=ratio,
batch_size=1,
num_input_channels=1,
use_1D_kernel=True,
)
f = pytensor.function([], bilin_mat, mode=self.compile_mode)
up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio)
utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
def test_bilinear_upsampling_reshaping(self):
# Test bilinear upsampling without giving shape information
#
# This method tests the bilinear_upsampling method
# without giving batch_size and num_input_channels
# upsampling for a ratio of two
input_x = np.array([[[[1, 2], [3, 4]]]], dtype=config.floatX)
for ratio in [2, 3]:
for use_1D_kernel in [True, False]:
bilin_mat = bilinear_upsampling(
input=input_x,
ratio=ratio,
batch_size=None,
num_input_channels=None,
use_1D_kernel=use_1D_kernel,
)
f = pytensor.function([], bilin_mat, mode=self.compile_mode)
up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio)
utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
def test_compare_1D_and_2D_upsampling_values(self):
# Compare 1D and 2D upsampling
#
# This method verifies the bilinear upsampling done by using
# 1D and 2D kernels will generate the same result.
# checking upsampling with ratio 5
rng = np.random.default_rng(280284)
input_x = rng.random((5, 4, 6, 7)).astype(config.floatX)
mat_1D = bilinear_upsampling(
input=input_x,
ratio=5,
batch_size=5,
num_input_channels=4,
use_1D_kernel=True,
)
mat_2D = bilinear_upsampling(
input=input_x,
ratio=5,
batch_size=5,
num_input_channels=4,
use_1D_kernel=False,
)
f_1D = pytensor.function([], mat_1D, mode=self.compile_mode)
f_2D = pytensor.function([], mat_2D, mode=self.compile_mode)
utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
# checking upsampling with ratio 8
input_x = rng.random((12, 11, 10, 7)).astype(config.floatX)
mat_1D = bilinear_upsampling(
input=input_x,
ratio=8,
batch_size=12,
num_input_channels=11,
use_1D_kernel=True,
)
mat_2D = bilinear_upsampling(
input=input_x,
ratio=8,
batch_size=12,
num_input_channels=11,
use_1D_kernel=False,
)
f_1D = pytensor.function([], mat_1D, mode=self.compile_mode)
f_2D = pytensor.function([], mat_2D, mode=self.compile_mode)
utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
def test_fractional_bilinear_upsampling(self):
"""Test bilinear upsampling with nonsimilar fractional
row and col ratios
"""
input_x = np.array(
[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]], ndmin=4
).astype(config.floatX)
up_x = bilinear_upsampling(
input=input_x, frac_ratio=((7, 4), (5, 3)), use_1D_kernel=False
)
num_up_x = np.array(
[
[
[
[1.0, 1.2, 1.8, 2.0],
[1.28571429, 1.48571429, 2.08571429, 2.28571429],
[2.42857143, 2.62857143, 3.22857143, 3.42857143],
[3.0, 3.2, 3.8, 4.0],
],
[
[5.0, 5.2, 5.8, 6.0],
[5.28571429, 5.48571429, 6.08571429, 6.28571429],
[6.42857143, 6.62857143, 7.22857143, 7.42857143],
[7.0, 7.2, 7.8, 8.0],
],
[
[9.0, 9.2, 9.8, 10.0],
[9.28571429, 9.48571429, 10.08571429, 10.28571429],
[10.42857143, 10.62857143, 11.22857143, 11.42857143],
[11.0, 11.2, 11.8, 12.0],
],
]
]
).astype(config.floatX)
f_up_x = pytensor.function([], up_x, mode=self.compile_mode)
utt.assert_allclose(f_up_x(), num_up_x, rtol=1e-6)
def test_fractional_bilinear_upsampling_shape(self):
x = np.random.random((1, 1, 200, 200)).astype(config.floatX)
resize = (24, 20)
z = bilinear_upsampling(
pt.as_tensor_variable(x), frac_ratio=resize, use_1D_kernel=False
)
out = pytensor.function([], z.shape, mode="FAST_RUN")()
utt.assert_allclose(out, (1, 1, 240, 240))
class TestConv2dTranspose:
mode = None
@pytest.mark.skipif(config.cxx == "", reason="Test needs cxx")
def test_interface(self):
# Test conv2d_transpose wrapper.
#
# This method tests that the order of the filter's
# axes expected by the function produces the correct
# output shape.
mode = self.mode
if config.mode == "FAST_COMPILE":
mode = (
pytensor.compile.get_mode(mode)
.excluding("conv_gemm")
.excluding("AbstractConvCheck")
)
output = pytensor.function(
inputs=[],
outputs=conv2d_transpose(
input=pt.ones((2, 2, 4, 4)),
filters=pt.ones((2, 1, 4, 4)),
output_shape=(2, 1, 10, 10),
input_dilation=(2, 2),
),
mode=mode,
)()
expected_output = np.array(
[
[
[
[2, 2, 4, 4, 4, 4, 4, 4, 2, 2],
[2, 2, 4, 4, 4, 4, 4, 4, 2, 2],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[4, 4, 8, 8, 8, 8, 8, 8, 4, 4],
[2, 2, 4, 4, 4, 4, 4, 4, 2, 2],
[2, 2, 4, 4, 4, 4, 4, 4, 2, 2],
]
]
]
* 2
)
np.testing.assert_equal(output, expected_output)
@pytest.mark.skipif(
not config.cxx or config.mode == "FAST_COMPILE",
reason="Need blas to test conv3d",
)
class TestConv2dGrads:
def setup_method(self):
self.random_stream = np.random.default_rng(utt.fetch_seed())
self.inputs_shapes = [(8, 1, 12, 12), (1, 1, 5, 5), (1, 1, 5, 6), (1, 1, 6, 6)]
self.filters_shapes = [(5, 1, 2, 2), (1, 1, 3, 3)] * 2
self.subsamples = [(1, 1), (2, 2)]
self.border_modes = ["valid", "full"]
self.filter_flip = [True, False]
self.output_grad = tensor4()
self.output_grad_wrt = tensor4()
self.x = tensor4("x", dtype=config.floatX) # inputs
self.w = tensor4("w", dtype=config.floatX) # filter weights
def test_conv2d_grad_wrt_inputs(self):
# Compares calculated abstract grads wrt inputs with the fwd grads
# This method checks the outputs of `conv2_grad_wrt_inputs` against
# the outputs of `pytensor.tensor.conv` forward grads to make sure the
# results are the same.
for in_shape, fltr_shape in zip(
self.inputs_shapes, self.filters_shapes, strict=True
):
for bm in self.border_modes:
for ss in self.subsamples:
for ff in self.filter_flip:
input_val = self.random_stream.random(in_shape).astype(
config.floatX
)
filter_val = self.random_stream.random(fltr_shape).astype(
config.floatX
)
out_grad_shape = abstract_conv.get_conv_output_shape(
image_shape=in_shape,
kernel_shape=fltr_shape,
border_mode=bm,
subsample=ss,
)
out_grad_val = self.random_stream.random(out_grad_shape).astype(
config.floatX
)
conv_out = abstract_conv.conv2d(
self.x,
filters=self.w,
border_mode=bm,
subsample=ss,
input_shape=in_shape,
filter_shape=fltr_shape,
filter_flip=ff,
)
conv_grad = pytensor.grad(
conv_out.sum(),
wrt=self.x,
known_grads={conv_out: self.output_grad},
)
f_old = pytensor.function(
[self.x, self.w, self.output_grad], conv_grad
)
conv_wrt_i_out = abstract_conv.conv2d_grad_wrt_inputs(
output_grad=self.output_grad_wrt,
filters=self.w,
border_mode=bm,
subsample=ss,
input_shape=in_shape,
filter_shape=fltr_shape,
filter_flip=ff,
)
f_new = pytensor.function(
[self.w, self.output_grad_wrt], conv_wrt_i_out
)
# check that they're equal
utt.assert_allclose(
f_new(filter_val, out_grad_val),
f_old(input_val, filter_val, out_grad_val),
)
def test_conv2d_grad_wrt_weights(self):
# Compares calculated abstract grads wrt weights with the fwd grads
# This method checks the outputs of `conv2_grad_wrt_weights` against
# the outputs of `pytensor.tensor.conv` forward grads to make sure the
# results are the same.
for in_shape, fltr_shape in zip(
self.inputs_shapes, self.filters_shapes, strict=True
):
for bm in self.border_modes:
for ss in self.subsamples:
for ff in self.filter_flip:
input_val = self.random_stream.random(in_shape).astype(
config.floatX
)
filter_val = self.random_stream.random(fltr_shape).astype(
config.floatX
)
out_grad_shape = abstract_conv.get_conv_output_shape(
image_shape=in_shape,
kernel_shape=fltr_shape,
border_mode=bm,
subsample=ss,
)
out_grad_val = self.random_stream.random(out_grad_shape).astype(
config.floatX
)
conv_out = abstract_conv.conv2d(
self.x,
filters=self.w,
border_mode=bm,
subsample=ss,
input_shape=in_shape,
filter_shape=fltr_shape,
filter_flip=ff,
)
conv_grad = pytensor.grad(
conv_out.sum(),
wrt=self.w,
known_grads={conv_out: self.output_grad},
)
f_old = pytensor.function(
[self.x, self.w, self.output_grad], conv_grad
)
conv_wrt_w_out = abstract_conv.conv2d_grad_wrt_weights(
self.x,
output_grad=self.output_grad_wrt,
border_mode=bm,
subsample=ss,
input_shape=in_shape,
filter_shape=fltr_shape,
filter_flip=ff,
)
f_new = pytensor.function(
[self.x, self.output_grad_wrt], conv_wrt_w_out
)
utt.assert_allclose(
f_new(input_val, out_grad_val),
f_old(input_val, filter_val, out_grad_val),
)
@pytest.mark.skipif(config.cxx == "", reason="cxx needed")
@pytest.mark.xfail(
condition=isinstance(get_default_mode().linker, NumbaLinker),
reason="Involves Ops with no Python implementation for numba to use as fallback",
)
class TestGroupedConvNoOptim:
conv = abstract_conv.AbstractConv2d
conv_gradw = abstract_conv.AbstractConv2d_gradWeights
conv_gradi = abstract_conv.AbstractConv2d_gradInputs
conv_op = abstract_conv.AbstractConv2d
conv_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer=None)
is_dnn = False
def setup_method(self):
self.num_groups = [3, 2, 4, 4]
self.border_mode = "valid"
self.subsample = (1, 1)
self.img_shape = [(5, 6, 5, 5), (4, 4, 7, 5), (3, 8, 5, 3), (2, 4, 7, 7)]
self.kern_shape = [(6, 2, 3, 3), (6, 2, 5, 3), (4, 2, 3, 3), (4, 1, 3, 5)]
self.top_shape = [(5, 6, 3, 3), (4, 6, 3, 3), (3, 4, 3, 1), (2, 4, 5, 3)]
self.filter_dilation = (1, 1)
self.ref_mode = "FAST_RUN"
self.convdim = 2
self.corr_fwd = conv2d_corr
self.corr_gradw = conv2d_corr_gw
self.corr_gradi = conv2d_corr_gi
def test_fwd(self):
if self.convdim == 2:
img_sym = tensor4("img")
kern_sym = tensor4("kern")
else:
img_sym = tensor5("img")
kern_sym = tensor5("kern")
for imshp, kshp, groups in zip(
self.img_shape, self.kern_shape, self.num_groups, strict=True
):
img = np.random.random(imshp).astype(config.floatX)
kern = np.random.random(kshp).astype(config.floatX)
split_imgs = np.split(img, groups, axis=1)
split_kern = np.split(kern, groups, axis=0)
grouped_conv_op = self.conv(
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
num_groups=groups,
)
grouped_conv_output = grouped_conv_op(img_sym, kern_sym)
grouped_func = pytensor.function(
[img_sym, kern_sym], grouped_conv_output, mode=self.mode
)
assert any(
isinstance(node.op, self.conv_op)
for node in grouped_func.maker.fgraph.toposort()
)
grouped_output = grouped_func(img, kern)
ref_conv_op = self.corr_fwd(
img_sym,
kern_sym,
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
)
ref_func = pytensor.function(
[img_sym, kern_sym], ref_conv_op, mode=self.ref_mode
)
ref_concat_output = [
ref_func(img_arr, kern_arr)
for img_arr, kern_arr in zip(split_imgs, split_kern, strict=True)
]
ref_concat_output = np.concatenate(ref_concat_output, axis=1)
utt.assert_allclose(grouped_output, ref_concat_output)
utt.verify_grad(grouped_conv_op, [img, kern], mode=self.mode, eps=1)
def test_gradweights(self):
if self.convdim == 2:
img_sym = tensor4("img")
top_sym = tensor4("kern")
else:
img_sym = tensor5("img")
top_sym = tensor5("kern")
for imshp, kshp, tshp, groups in zip(
self.img_shape,
self.kern_shape,
self.top_shape,
self.num_groups,
strict=True,
):
img = np.random.random(imshp).astype(config.floatX)
top = np.random.random(tshp).astype(config.floatX)
split_imgs = np.split(img, groups, axis=1)
split_top = np.split(top, groups, axis=1)
grouped_convgrad_op = self.conv_gradw(
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
num_groups=groups,
)
grouped_conv_output = grouped_convgrad_op(
img_sym, top_sym, pt.as_tensor_variable(kshp[-self.convdim :])
)
grouped_func = pytensor.function(
[img_sym, top_sym], grouped_conv_output, mode=self.mode
)
assert any(
isinstance(node.op, self.conv_gradw_op)
for node in grouped_func.maker.fgraph.toposort()
)
grouped_output = grouped_func(img, top)
ref_conv_op = self.corr_gradw(
img_sym,
top_sym,
kshp,
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
)
ref_func = pytensor.function(
[img_sym, top_sym], ref_conv_op, mode=self.ref_mode
)
ref_concat_output = [
ref_func(img_arr, top_arr)
for img_arr, top_arr in zip(split_imgs, split_top, strict=True)
]
ref_concat_output = np.concatenate(ref_concat_output, axis=0)
utt.assert_allclose(grouped_output, ref_concat_output)
def conv_gradweight(inputs_val, output_val):
return grouped_convgrad_op(
inputs_val,
output_val,
pt.as_tensor_variable(kshp[-self.convdim :]),
)
utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
def test_gradinputs(self):
if self.convdim == 2:
kern_sym = tensor4("kern")
top_sym = tensor4("top")
else:
kern_sym = tensor5("kern")
top_sym = tensor5("top")
for imshp, kshp, tshp, groups in zip(
self.img_shape,
self.kern_shape,
self.top_shape,
self.num_groups,
strict=True,
):
kern = np.random.random(kshp).astype(config.floatX)
top = np.random.random(tshp).astype(config.floatX)
split_kerns = np.split(kern, groups, axis=0)
split_top = np.split(top, groups, axis=1)
grouped_convgrad_op = self.conv_gradi(
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
num_groups=groups,
)
grouped_conv_output = grouped_convgrad_op(
kern_sym, top_sym, pt.as_tensor_variable(imshp[-self.convdim :])
)
grouped_func = pytensor.function(
[kern_sym, top_sym], grouped_conv_output, mode=self.mode
)
assert any(
isinstance(node.op, self.conv_gradi_op)
for node in grouped_func.maker.fgraph.toposort()
)
grouped_output = grouped_func(kern, top)
ref_conv_op = self.corr_gradi(
kern_sym,
top_sym,
imshp,
border_mode=self.border_mode,
subsample=self.subsample,
filter_dilation=self.filter_dilation,
)
ref_func = pytensor.function(
[kern_sym, top_sym], ref_conv_op, mode=self.ref_mode
)
ref_concat_output = [
ref_func(kern_arr, top_arr)
for kern_arr, top_arr in zip(split_kerns, split_top, strict=True)
]
ref_concat_output = np.concatenate(ref_concat_output, axis=1)
utt.assert_allclose(grouped_output, ref_concat_output)
def conv_gradinputs(filters_val, output_val):
return grouped_convgrad_op(
filters_val,
output_val,
pt.as_tensor_variable(imshp[-self.convdim :]),
)
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
@pytest.mark.skipif(config.cxx == "", reason="cxx needed")
@pytest.mark.xfail(
condition=isinstance(get_default_mode().linker, NumbaLinker),
reason="Involves Ops with no Python implementation for numba to use as fallback",
)
class TestGroupedConv3dNoOptim(TestGroupedConvNoOptim):
conv = abstract_conv.AbstractConv3d
conv_gradw = abstract_conv.AbstractConv3d_gradWeights
conv_gradi = abstract_conv.AbstractConv3d_gradInputs
conv_op = abstract_conv.AbstractConv3d
conv_gradw_op = abstract_conv.AbstractConv3d_gradWeights
conv_gradi_op = abstract_conv.AbstractConv3d_gradInputs
mode = Mode(optimizer=None)
def setup_method(self):
self.num_groups = [3, 2, 4, 4]
self.border_mode = "valid"
self.subsample = (1, 1, 1)
self.img_shape = [
(2, 6, 5, 5, 5),
(1, 4, 7, 5, 7),
(1, 8, 5, 3, 5),
(2, 4, 7, 7, 7),
]
self.kern_shape = [
(3, 2, 3, 3, 3),
(6, 2, 5, 3, 5),
(4, 2, 3, 3, 3),
(4, 1, 3, 5, 3),
]
self.top_shape = [
(2, 3, 3, 3, 3),
(1, 6, 3, 3, 3),
(1, 4, 3, 1, 3),
(2, 4, 5, 3, 5),
]
self.filter_dilation = (1, 1, 1)
self.ref_mode = "FAST_RUN"
self.convdim = 3
self.corr_fwd = conv3d_corr
self.corr_gradw = conv3d_corr_gw
self.corr_gradi = conv3d_corr_gi
class TestSeparableConv:
def setup_method(self):
self.x = np.array(
[
[
[
[1, 2, 3, 4, 5],
[3, 2, 1, 4, 5],
[3, 3, 1, 3, 6],
[5, 3, 2, 1, 1],
[4, 7, 1, 2, 1],
],
[
[3, 3, 1, 2, 6],
[6, 5, 4, 3, 1],
[3, 4, 5, 2, 3],
[6, 4, 1, 3, 4],
[2, 3, 4, 2, 5],
],
]
]
).astype(config.floatX)
self.depthwise_filter = np.array(
[
[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]],
[[[5, 5, 2], [3, 7, 4], [3, 5, 4]]],
[[[7, 4, 7], [5, 3, 3], [1, 3, 1]]],
[[[4, 4, 4], [2, 4, 6], [0, 0, 7]]],
]
).astype(config.floatX)
self.pointwise_filter = np.array(
[[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]
).astype(config.floatX)
self.precomp_output_valid = np.array(
[
[
[[1385, 1333, 1339], [1382, 1243, 1291], [1303, 1120, 1228]],
[[1532, 1410, 1259], [1522, 1346, 1314], [1379, 1192, 1286]],
]
]
).astype(config.floatX)
self.precomp_output_full = np.array(
[
[
[
[140, 266, 343, 206, 59],
[395, 697, 979, 585, 245],
[429, 863, 1385, 919, 453],
[243, 499, 864, 627, 371],
[90, 183, 291, 254, 202],
],
[
[149, 289, 359, 213, 58],
[400, 750, 1076, 662, 266],
[387, 854, 1532, 1091, 540],
[174, 411, 971, 786, 518],
[51, 110, 286, 299, 298],
],
]
]
).astype(config.floatX)
@pytest.mark.skipif(config.cxx == "", reason="test needs cxx")
def test_interface2d(self):
x_sym = tensor4("x")
dfilter_sym = tensor4("d")
pfilter_sym = tensor4("p")
sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1])
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
# test for square matrix
top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
utt.assert_allclose(top, self.precomp_output_valid)
# test for non-square matrix
top = fun(self.x[:, :, :3, :], self.depthwise_filter, self.pointwise_filter)
utt.assert_allclose(top, self.precomp_output_valid[:, :, :1, :])
# test if it infers shape
sep_op = separable_conv2d(
x_sym,
dfilter_sym,
pfilter_sym,
self.x.shape[1],
input_shape=self.x.shape,
depthwise_filter_shape=self.depthwise_filter.shape,
pointwise_filter_shape=self.pointwise_filter.shape,
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
utt.assert_allclose(top, self.precomp_output_valid)
# test non-default subsample
sep_op = separable_conv2d(
x_sym, dfilter_sym, pfilter_sym, self.x.shape[1], subsample=(2, 2)
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
utt.assert_allclose(
top, np.delete(np.delete(self.precomp_output_valid, 1, axis=3), 1, axis=2)
)
# test non-default border_mode
sep_op = separable_conv2d(
x_sym, dfilter_sym, pfilter_sym, self.x.shape[1], border_mode="full"
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(self.x[:, :, :3, :3], self.depthwise_filter, self.pointwise_filter)
utt.assert_allclose(top, self.precomp_output_full)
@pytest.mark.skipif(config.cxx == "", reason="test needs cxx")
def test_interface3d(self):
# Expand the filter along the depth
x = np.tile(np.expand_dims(self.x, axis=2), (1, 1, 5, 1, 1))
depthwise_filter = np.tile(
np.expand_dims(self.depthwise_filter, axis=2), (1, 1, 3, 1, 1)
)
pointwise_filter = np.expand_dims(self.pointwise_filter, axis=2)
precomp_output = (
np.tile(np.expand_dims(self.precomp_output_valid, axis=2), (1, 1, 3, 1, 1))
* 3
)
x_sym = tensor5("x")
dfilter_sym = tensor5("d")
pfilter_sym = tensor5("p")
sep_op = separable_conv3d(x_sym, dfilter_sym, pfilter_sym, x.shape[1])
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
# test for square matrix
top = fun(x, depthwise_filter, pointwise_filter)
utt.assert_allclose(top, precomp_output)
# test for non-square matrix
top = fun(x[:, :, :3, :, :3], depthwise_filter, pointwise_filter)
utt.assert_allclose(top, precomp_output[:, :, :1, :, :1])
# test if it infers shape
sep_op = separable_conv3d(
x_sym,
dfilter_sym,
pfilter_sym,
x.shape[1],
input_shape=x.shape,
depthwise_filter_shape=depthwise_filter.shape,
pointwise_filter_shape=pointwise_filter.shape,
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(x, depthwise_filter, pointwise_filter)
utt.assert_allclose(top, precomp_output)
# test non-default subsample
sep_op = separable_conv3d(
x_sym, dfilter_sym, pfilter_sym, x.shape[1], subsample=(2, 2, 2)
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(x, depthwise_filter, pointwise_filter)
utt.assert_allclose(
top,
np.delete(
np.delete(np.delete(precomp_output, 1, axis=4), 1, axis=3), 1, axis=2
),
)
# test non-default border_mode
precomp_output = np.tile(
np.expand_dims(self.precomp_output_full, axis=2), (1, 1, 5, 1, 1)
) * np.array([[[[[1]], [[2]], [[3]], [[2]], [[1]]]]])
sep_op = separable_conv3d(
x_sym, dfilter_sym, pfilter_sym, x.shape[1], border_mode="full"
)
fun = pytensor.function(
[x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
)
top = fun(x[:, :, :3, :3, :3], depthwise_filter, pointwise_filter)
utt.assert_allclose(top, precomp_output)
@pytest.mark.skipif(
config.cxx == "",
reason="SciPy and cxx needed",
)
class TestUnsharedConv:
conv2d = abstract_conv.AbstractConv2d
conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
conv2d_op = abstract_conv.AbstractConv2d
conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer="None")
def setup_method(self):
self.img_shape = [(2, 2, 4, 4), (3, 2, 4, 2), (3, 3, 5, 3), (3, 4, 4, 4)]
self.kern_shape = [
(2, 2, 2, 2, 3, 3),
(2, 4, 2, 2, 4, 2),
(3, 2, 1, 1, 3, 3),
(4, 3, 3, 2, 4, 2),
]
self.topgrad_shape = [(2, 2, 2, 2), (3, 2, 4, 2), (3, 3, 2, 1), (3, 4, 3, 3)]
self.border_mode = ["valid", "full", "valid", "full"]
self.subsample = [(1, 1), (2, 2), (2, 1), (3, 2)]
self.filter_dilation = (1, 1)
self.num_groups = [1, 1, 3, 2]
# self.verify_flags = np.random.choice([True, False], 4, [0.5, 0.5])
# Above line can be used instead if speed is a concern
self.verify_flags = [True] * 4
self.ref_mode = "FAST_RUN"
def test_fwd(self):
tensor6 = TensorType(config.floatX, shape=(None,) * 6)
img_sym = tensor4("img")
kern_sym = tensor6("kern")
ref_kern_sym = tensor4("ref_kern")
for imshp, kshp, mode, sub, groups, verify in zip(
self.img_shape,
self.kern_shape,
self.border_mode,
self.subsample,
self.num_groups,
self.verify_flags,
strict=True,
):
img = np.random.random(imshp).astype(config.floatX)
kern = np.random.random(kshp).astype(config.floatX)
unshared_conv_op = self.conv2d(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=True,
)
unshared_out_sym = unshared_conv_op(img_sym, kern_sym)
unshared_func = pytensor.function(
[img_sym, kern_sym], unshared_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_op)
for node in unshared_func.maker.fgraph.toposort()
)
unshared_output = unshared_func(img, kern)
single_kshp = kshp[:1] + kshp[3:]
ref_conv_op = self.conv2d(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=False,
)
ref_out_sym = ref_conv_op(img_sym, ref_kern_sym)
ref_func = pytensor.function(
[img_sym, ref_kern_sym], ref_out_sym, mode=self.mode
)
for i in range(0, kshp[1]):
for j in range(0, kshp[2]):
single_kern = kern[:, i, j, ...].reshape(single_kshp)
ref_val = ref_func(img, single_kern)
utt.assert_allclose(
ref_val[:, :, i, j], unshared_output[:, :, i, j]
)
if verify:
utt.verify_grad(unshared_conv_op, [img, kern], mode=self.mode, eps=1)
def test_gradweight(self):
img_sym = tensor4("img")
top_sym = tensor4("top")
for imshp, kshp, topshp, mode, sub, groups, verify in zip(
self.img_shape,
self.kern_shape,
self.topgrad_shape,
self.border_mode,
self.subsample,
self.num_groups,
self.verify_flags,
strict=True,
):
img = np.random.random(imshp).astype(config.floatX)
top = np.random.random(topshp).astype(config.floatX)
unshared_conv_op = self.conv2d_gradw(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=True,
)
unshared_out_sym = unshared_conv_op(
img_sym, top_sym, pt.as_tensor_variable(kshp[-2:])
)
unshared_func = pytensor.function(
[img_sym, top_sym], unshared_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_gradw_op)
for node in unshared_func.maker.fgraph.toposort()
)
unshared_output = unshared_func(img, top)
single_kshp = kshp[:1] + kshp[3:]
ref_conv_op = self.conv2d_gradw(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=False,
)
ref_out_sym = ref_conv_op(
img_sym, top_sym, pt.as_tensor_variable(single_kshp[-2:])
)
ref_func = pytensor.function(
[img_sym, top_sym], ref_out_sym, mode=self.mode
)
for i in range(0, topshp[2]):
for j in range(0, topshp[3]):
top_single = np.zeros_like(top)
top_single[:, :, i, j] = top[:, :, i, j]
ref_output = ref_func(img, top_single)
utt.assert_allclose(unshared_output[:, i, j, ...], ref_output)
def conv_gradweight(inputs_val, output_val):
return unshared_conv_op(
inputs_val, output_val, pt.as_tensor_variable(kshp[-2:])
)
if verify:
utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
def test_gradinput(self):
tensor6 = TensorType(config.floatX, shape=(None,) * 6)
kern_sym = tensor6("kern")
top_sym = tensor4("top")
ref_kern_sym = tensor4("ref_kern")
for imshp, kshp, topshp, mode, sub, groups, verify in zip(
self.img_shape,
self.kern_shape,
self.topgrad_shape,
self.border_mode,
self.subsample,
self.num_groups,
self.verify_flags,
strict=True,
):
single_kshp = kshp[:1] + kshp[3:]
kern = np.random.random(kshp).astype(config.floatX)
top = np.random.random(topshp).astype(config.floatX)
unshared_conv_op = self.conv2d_gradi(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=True,
)
unshared_out_sym = unshared_conv_op(
kern_sym, top_sym, pt.as_tensor_variable(imshp[-2:])
)
unshared_func = pytensor.function(
[kern_sym, top_sym], unshared_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_gradi_op)
for node in unshared_func.maker.fgraph.toposort()
)
unshared_output = unshared_func(kern, top)
ref_conv_op = self.conv2d_gradi(
border_mode=mode,
subsample=sub,
filter_dilation=self.filter_dilation,
num_groups=groups,
unshared=False,
)
ref_out_sym = ref_conv_op(
ref_kern_sym, top_sym, pt.as_tensor_variable(imshp[-2:])
)
ref_func = pytensor.function(
[ref_kern_sym, top_sym], ref_out_sym, mode=self.mode
)
ref_output = np.zeros(imshp)
for i in range(0, topshp[2]):
for j in range(0, topshp[3]):
single_kern = kern[:, i, j, ...].reshape(single_kshp)
top_single = np.zeros_like(top)
top_single[:, :, i, j] = top[:, :, i, j]
ref_output += ref_func(single_kern, top_single)
utt.assert_allclose(ref_output, unshared_output)
def conv_gradinputs(filters_val, output_val):
return unshared_conv_op(
filters_val, output_val, pt.as_tensor_variable(imshp[-2:])
)
if verify:
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
class TestAsymmetricPadding:
conv2d = abstract_conv.AbstractConv2d
conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
conv2d_op = abstract_conv.AbstractConv2d
conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer="None")
img_shape = [(2, 2, 4, 4), (3, 2, 4, 2), (3, 3, 5, 3)]
kern_shape = [(4, 2, 2, 2), (2, 2, 4, 2), (2, 3, 3, 3)]
topgrad_shape = [(2, 4, 6, 6), (3, 2, 3, 4), (3, 2, 6, 1)]
border_mode = [((1, 2), (2, 1)), ((1, 1), (0, 3)), ((2, 1), (0, 0))]
@pytest.mark.skipif(
config.cxx == "",
reason="SciPy and cxx needed",
)
def test_fwd(self):
img_sym = tensor4("img")
kern_sym = tensor4("kern")
for imshp, kshp, pad in zip(
self.img_shape, self.kern_shape, self.border_mode, strict=True
):
img = np.random.random(imshp).astype(config.floatX)
kern = np.random.random(kshp).astype(config.floatX)
asymmetric_conv_op = self.conv2d(
border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
)
asymmetric_out_sym = asymmetric_conv_op(img_sym, kern_sym)
asymmetric_func = pytensor.function(
[img_sym, kern_sym], asymmetric_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_op)
for node in asymmetric_func.maker.fgraph.toposort()
)
asymmetric_output = asymmetric_func(img, kern)
ref_conv_op = self.conv2d(
border_mode="valid", subsample=(1, 1), filter_dilation=(1, 1)
)
ref_out_sym = ref_conv_op(img_sym, kern_sym)
ref_func = pytensor.function(
[img_sym, kern_sym], ref_out_sym, mode=self.mode
)
exp_imshp = (
imshp[0],
imshp[1],
imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1],
)
exp_img = np.zeros(exp_imshp, dtype=config.floatX)
exp_img[
:, :, pad[0][0] : imshp[2] + pad[0][0], pad[1][0] : imshp[3] + pad[1][0]
] = img
ref_output = ref_func(exp_img, kern)
utt.assert_allclose(asymmetric_output, ref_output)
utt.verify_grad(asymmetric_conv_op, [img, kern], mode=self.mode, eps=1)
@pytest.mark.skipif(
config.cxx == "",
reason="SciPy and cxx needed",
)
def test_gradweight(self):
img_sym = tensor4("img")
top_sym = tensor4("top")
for imshp, kshp, topshp, pad in zip(
self.img_shape,
self.kern_shape,
self.topgrad_shape,
self.border_mode,
strict=True,
):
img = np.random.random(imshp).astype(config.floatX)
top = np.random.random(topshp).astype(config.floatX)
asymmetric_conv_op = self.conv2d_gradw(
border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
)
asymmetric_out_sym = asymmetric_conv_op(img_sym, top_sym, kshp[-2:])
asymmetric_func = pytensor.function(
[img_sym, top_sym], asymmetric_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_gradw_op)
for node in asymmetric_func.maker.fgraph.toposort()
)
asymmetric_output = asymmetric_func(img, top)
ref_conv_op = self.conv2d_gradw(
border_mode="valid", subsample=(1, 1), filter_dilation=(1, 1)
)
ref_out_sym = ref_conv_op(img_sym, top_sym, kshp[-2:])
ref_func = pytensor.function(
[img_sym, top_sym], ref_out_sym, mode=self.mode
)
exp_imshp = (
imshp[0],
imshp[1],
imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1],
)
exp_img = np.zeros(exp_imshp, dtype=config.floatX)
exp_img[
:, :, pad[0][0] : imshp[2] + pad[0][0], pad[1][0] : imshp[3] + pad[1][0]
] = img
ref_output = ref_func(exp_img, top)
utt.assert_allclose(asymmetric_output, ref_output)
def conv_gradweight(inputs_val, output_val):
return asymmetric_conv_op(
inputs_val, output_val, pt.as_tensor_variable(kshp[-2:])
)
utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
@pytest.mark.skipif(
config.cxx == "",
reason="SciPy and cxx needed",
)
def test_gradinput(self):
kern_sym = tensor4("kern")
top_sym = tensor4("top")
for imshp, kshp, topshp, pad in zip(
self.img_shape,
self.kern_shape,
self.topgrad_shape,
self.border_mode,
strict=True,
):
kern = np.random.random(kshp).astype(config.floatX)
top = np.random.random(topshp).astype(config.floatX)
asymmetric_conv_op = self.conv2d_gradi(
border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
)
asymmetric_out_sym = asymmetric_conv_op(kern_sym, top_sym, imshp[-2:])
asymmetric_func = pytensor.function(
[kern_sym, top_sym], asymmetric_out_sym, mode=self.mode
)
assert any(
isinstance(node.op, self.conv2d_gradi_op)
for node in asymmetric_func.maker.fgraph.toposort()
)
asymmetric_output = asymmetric_func(kern, top)
ref_conv_op = self.conv2d_gradi(
border_mode="valid", subsample=(1, 1), filter_dilation=(1, 1)
)
exp_imshp = [
imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1],
]
ref_out_sym = ref_conv_op(kern_sym, top_sym, exp_imshp)
ref_func = pytensor.function(
[kern_sym, top_sym], ref_out_sym, mode=self.mode
)
ref_output = ref_func(kern, top)
ref_output = ref_output[
:, :, pad[0][0] : imshp[2] + pad[0][0], pad[1][0] : imshp[3] + pad[1][0]
]
utt.assert_allclose(asymmetric_output, ref_output)
def conv_gradinputs(filters_val, output_val):
return asymmetric_conv_op(
filters_val, output_val, pt.as_tensor_variable(imshp[-2:])
)
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
class TestCausalConv:
mode = Mode(optimizer="None")
img = np.array(
[
[[2, 4, 9, 5, 8], [0, 0, 4, 0, 5]],
[[2, 5, 8, 5, 5], [1, 3, 0, 7, 9]],
[[7, 0, 7, 1, 0], [0, 1, 4, 7, 2]],
]
).astype(config.floatX)
kern = np.array([[[5, 3, 1], [3, 1, 0]], [[6, 4, 9], [2, 2, 7]]]).astype(
config.floatX
)
dilation = 2
precomp_top = np.array(
[
[[10, 20, 63, 37, 88], [12, 24, 70, 46, 120]],
[[13, 34, 47, 64, 78], [14, 36, 58, 70, 105]],
[[35, 3, 68, 27, 38], [42, 2, 78, 22, 103]],
]
).astype(config.floatX)
@pytest.mark.skipif(
config.cxx == "",
reason="SciPy and cxx needed",
)
def test_interface(self):
img_sym = tensor3("img")
kern_sym = tensor3("kern")
sym_out = causal_conv1d(
img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation
)
causal_func = pytensor.function([img_sym, kern_sym], sym_out, mode=self.mode)
output = causal_func(self.img, self.kern)
utt.assert_allclose(output, self.precomp_top)
def causal_conv_fn(inputs_val, filters_val):
return causal_conv1d(
inputs_val, filters_val, self.kern.shape, filter_dilation=1
)
utt.verify_grad(causal_conv_fn, [self.img, self.kern], mode=self.mode, eps=1)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论