提交 02974b9d authored 作者: Maxim Kochurov's avatar Maxim Kochurov 提交者: Maxim Kochurov

remove deprecated pytensor.tensor.nnet

上级 15637d23
...@@ -77,7 +77,7 @@ jobs: ...@@ -77,7 +77,7 @@ jobs:
- "tests/tensor tests/sparse --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_math_scipy.py --ignore=tests/tensor/test_inplace.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/rewriting/test_basic.py --ignore=tests/tensor/rewriting/test_math.py --ignore=tests/tensor/nnet --ignore=tests/tensor/signal" - "tests/tensor tests/sparse --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_math_scipy.py --ignore=tests/tensor/test_inplace.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/rewriting/test_basic.py --ignore=tests/tensor/rewriting/test_math.py --ignore=tests/tensor/nnet --ignore=tests/tensor/signal"
- "tests/tensor/test_basic.py tests/tensor/test_math.py tests/tensor/test_math_scipy.py tests/tensor/test_inplace.py" - "tests/tensor/test_basic.py tests/tensor/test_math.py tests/tensor/test_math_scipy.py tests/tensor/test_inplace.py"
- "tests/tensor/test_elemwise.py tests/tensor/rewriting/test_basic.py tests/tensor/rewriting/test_math.py" - "tests/tensor/test_elemwise.py tests/tensor/rewriting/test_basic.py tests/tensor/rewriting/test_math.py"
- "tests/tensor/nnet/test_conv.py" - "tests/tensor/conv/test_abstract_conv.py"
include: include:
- python-version: "3.7" - python-version: "3.7"
fast-compile: 1 fast-compile: 1
......
=========================================
:mod:`tensor.conv` -- Tensor Convolutions
=========================================
.. module:: tensor.conv
:platform: Unix, Windows
:synopsis: Tensor Convolutions
.. moduleauthor:: LISA, PyMC Developers, PyTensor Developers
.. automodule:: pytensor.tensor.conv
:members:
\ No newline at end of file
...@@ -26,5 +26,6 @@ They are grouped into the following sections: ...@@ -26,5 +26,6 @@ They are grouped into the following sections:
slinalg slinalg
nlinalg nlinalg
fft fft
conv
math_opt math_opt
basic_opt basic_opt
import warnings
warnings.warn(
"The module `pytensor.scalar.basic_scipy` is deprecated "
"and has been renamed to `pytensor.scalar.math`",
DeprecationWarning,
stacklevel=2,
)
from .abstract_conv import (
bilinear_upsampling,
causal_conv1d,
conv2d,
conv2d_transpose,
conv3d,
frac_bilinear_upsampling,
separable_conv2d,
separable_conv3d,
)
...@@ -5,14 +5,8 @@ Abstract conv interface ...@@ -5,14 +5,8 @@ Abstract conv interface
import logging import logging
import sys import sys
try:
from math import gcd
except ImportError:
from fractions import gcd
import warnings import warnings
from math import gcd
import numpy as np import numpy as np
...@@ -35,8 +29,7 @@ from pytensor.tensor.exceptions import NotScalarConstantError ...@@ -35,8 +29,7 @@ from pytensor.tensor.exceptions import NotScalarConstantError
from pytensor.tensor.var import TensorConstant, TensorVariable from pytensor.tensor.var import TensorConstant, TensorVariable
__docformat__ = "restructuredtext en" _logger = logging.getLogger(__name__)
_logger = logging.getLogger("pytensor.tensor.nnet.abstract_conv")
def get_conv_output_shape( def get_conv_output_shape(
...@@ -678,7 +671,7 @@ def abstract_conv2d( ...@@ -678,7 +671,7 @@ def abstract_conv2d(
stack of 2D inputs with a set of 2D filters. The implementation is modelled stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN). after Convolutional Neural Networks (CNN).
Refer to :func:`nnet.conv2d <pytensor.tensor.nnet.conv2d>` for a more detailed documentation. Refer to :func:`nnet.conv2d <pytensor.tensor.conv.conv2d>` for a more detailed documentation.
""" """
input = as_tensor_variable(input) input = as_tensor_variable(input)
...@@ -2430,7 +2423,7 @@ class BaseAbstractConv(Op): ...@@ -2430,7 +2423,7 @@ class BaseAbstractConv(Op):
class AbstractConv(BaseAbstractConv): class AbstractConv(BaseAbstractConv):
"""Abstract Op for the forward convolution. """Abstract Op for the forward convolution.
Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>` Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
for a more detailed documentation. for a more detailed documentation.
""" """
...@@ -2646,7 +2639,7 @@ class AbstractConv(BaseAbstractConv): ...@@ -2646,7 +2639,7 @@ class AbstractConv(BaseAbstractConv):
class AbstractConv2d(AbstractConv): class AbstractConv2d(AbstractConv):
"""Abstract Op for the forward convolution. """Abstract Op for the forward convolution.
Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>` Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
for a more detailed documentation. for a more detailed documentation.
""" """
...@@ -2708,7 +2701,7 @@ class AbstractConv2d(AbstractConv): ...@@ -2708,7 +2701,7 @@ class AbstractConv2d(AbstractConv):
class AbstractConv3d(AbstractConv): class AbstractConv3d(AbstractConv):
"""Abstract Op for the forward convolution. """Abstract Op for the forward convolution.
Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>` Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
for a more detailed documentation. for a more detailed documentation.
""" """
...@@ -3489,11 +3482,9 @@ def conv2d( ...@@ -3489,11 +3482,9 @@ def conv2d(
border_mode="valid", border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
filter_flip=True, filter_flip=True,
image_shape=None,
filter_dilation=(1, 1), filter_dilation=(1, 1),
num_groups=1, num_groups=1,
unshared=False, unshared=False,
**kwargs,
): ):
""" """
This function will build the symbolic graph for convolving a mini-batch of a This function will build the symbolic graph for convolving a mini-batch of a
...@@ -3584,36 +3575,6 @@ def conv2d( ...@@ -3584,36 +3575,6 @@ def conv2d(
of shape (batch size, output channels, output rows, output columns) of shape (batch size, output channels, output rows, output columns)
""" """
if "imshp_logical" in kwargs or "kshp_logical" in kwargs:
raise ValueError(
"Keyword arguments 'imshp_logical' and 'kshp_logical' for conv2d "
"are not supported anymore (and have not been a reliable way to "
"perform upsampling). That feature is still available by calling "
"pytensor.tensor.nnet.conv.conv2d() for the time being."
)
if len(kwargs.keys()) > 0:
warnings.warn(
str(kwargs.keys()) + " are now deprecated in "
"`tensor.nnet.abstract_conv.conv2d` interface"
" and will be ignored.",
stacklevel=2,
)
if image_shape is not None:
warnings.warn(
"The `image_shape` keyword argument to "
"`tensor.nnet.conv2d` is deprecated, it has been "
"renamed to `input_shape`.",
stacklevel=2,
)
if input_shape is None:
input_shape = image_shape
else:
raise ValueError(
"input_shape and image_shape should not"
" be provided at the same time."
)
return abstract_conv2d( return abstract_conv2d(
input, input,
filters, filters,
......
import warnings
warnings.warn(
"The module `pytensor.tensor.nnet` is deprecated and will "
"be removed from PyTensor in version 2.9.0",
DeprecationWarning,
stacklevel=2,
)
import pytensor.tensor.nnet.rewriting
from pytensor.tensor.nnet.abstract_conv import (
abstract_conv2d,
conv2d,
conv2d_grad_wrt_inputs,
conv2d_transpose,
conv3d,
separable_conv2d,
)
from pytensor.tensor.nnet.basic import (
binary_crossentropy,
categorical_crossentropy,
confusion_matrix,
crossentropy_categorical_1hot,
crossentropy_categorical_1hot_grad,
crossentropy_softmax_1hot,
crossentropy_softmax_1hot_with_bias,
crossentropy_softmax_1hot_with_bias_dx,
crossentropy_softmax_argmax_1hot_with_bias,
crossentropy_softmax_max_and_argmax_1hot,
crossentropy_softmax_max_and_argmax_1hot_with_bias,
crossentropy_to_crossentropy_with_softmax,
crossentropy_to_crossentropy_with_softmax_with_bias,
elu,
graph_merge_softmax_with_crossentropy_softmax,
h_softmax,
logsoftmax,
prepend_0_to_each_row,
prepend_1_to_each_row,
prepend_scalar_to_each_row,
relu,
selu,
sigmoid_binary_crossentropy,
softmax,
softmax_grad_legacy,
softmax_legacy,
softmax_simplifier,
softmax_with_bias,
softsign,
)
from pytensor.tensor.nnet.batchnorm import batch_normalization
from pytensor.tensor.nnet.sigm import hard_sigmoid, ultra_fast_sigmoid
"""
Provides neural-network specific Ops.
Notes
-----
TODO: factor this out into a neural-network toolbox.
"""
import numpy as np
import pytensor
from pytensor import scalar as aes
from pytensor.compile import optdb
from pytensor.gradient import DisconnectedType, grad_not_implemented
from pytensor.graph.basic import Apply
from pytensor.graph.op import Op
from pytensor.graph.rewriting.basic import (
copy_stack_trace,
graph_rewriter,
node_rewriter,
)
from pytensor.link.c.op import COp
from pytensor.raise_op import Assert
from pytensor.scalar import UnaryScalarOp
from pytensor.tensor import basic as at
from pytensor.tensor.basic import ARange
from pytensor.tensor.elemwise import DimShuffle, Elemwise
from pytensor.tensor.exceptions import NotScalarConstantError
from pytensor.tensor.extra_ops import Unique
from pytensor.tensor.math import (
MaxAndArgmax,
Sum,
add,
dot,
eq,
exp,
expm1,
log,
max_and_argmax,
mul,
neg,
or_,
sigmoid,
softplus,
)
from pytensor.tensor.math import sum as at_sum
from pytensor.tensor.math import tanh, tensordot, true_div
from pytensor.tensor.nnet.blocksparse import sparse_block_dot
from pytensor.tensor.rewriting.basic import (
register_canonicalize,
register_specialize,
register_stabilize,
)
from pytensor.tensor.rewriting.math import local_mul_canonizer
from pytensor.tensor.shape import Shape, shape_padleft
from pytensor.tensor.special import Softmax, SoftmaxGrad, log_softmax, softmax
from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedSubtensor
from pytensor.tensor.type import (
TensorType,
discrete_dtypes,
float_dtypes,
integer_dtypes,
)
class SoftmaxWithBias(COp):
"""
An L{Op} for the output of neural-net multiclass classifiers.
Attributes
----------
x : a matrix of floats (32 or 64)
b : a [row] vector of floats (32 or 64), length is number of cols in x
This L{Op}'s output is softmax(x+b).
softmax(x[i]) is the i'th distribution over len(x[i]) options.
"""
nin = 2
nout = 1
__props__ = ()
def make_node(self, x, b):
x = at.as_tensor_variable(x)
b = at.as_tensor_variable(b)
if x.type.ndim != 2 or x.type.dtype not in float_dtypes:
raise ValueError("x must be 2-d tensor of floats")
if b.type.ndim != 1 or b.type.dtype not in float_dtypes:
raise ValueError("b must be 1-d tensor of floats")
sm = x.type()
return Apply(self, [x, b], [sm])
def perform(self, node, input_storage, output_storage):
x, b = input_storage
if b.shape[0] != x.shape[1]:
raise ValueError("b must have same number of columns as x")
# sm = numpy.zeros_like(x)
# for i in range(sm.shape[0]):
# row = x[i] + b
# sm[i] = numpy.exp(row - numpy.max(row))
# sm[i] *= 1.0 / numpy.sum(sm[i])
# output_storage[0][0] = sm
if x.size == 0:
# Numpy doesn't like the max of a zero-sized object.
output_storage[0][0] = np.zeros(x.shape, dtype=x.dtype)
return
x_dtype = x.dtype
# Perform computations in float32 otherwise the result is too imprecise
if x.dtype == "float16":
x = x.astype("float32")
x_plus_b = x + b[None, :]
e_x = np.exp(x_plus_b - x_plus_b.max(axis=1)[:, None])
e_x *= 1.0 / e_x.sum(axis=1)[:, None]
# default for copy is True and we don't need a copy if the
# data type matches.
output_storage[0][0] = e_x.astype(x_dtype, copy=False)
def L_op(self, inp, outputs, grads):
x, b = inp
(g_sm,) = grads
if isinstance(g_sm.type, DisconnectedType):
return [DisconnectedType()(), DisconnectedType()()]
dx = softmax_grad_legacy(g_sm, outputs[0])
db = at_sum(dx, axis=0)
return dx, db
def infer_shape(self, fgraph, node, shape):
return [shape[0]]
def c_headers(self, **kwargs):
return ["<iostream>", "<cmath>"]
@staticmethod
def c_code_template(dtype):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
# TODO: put this into a templated function, in the support code
# TODO: declare the max of each row as an Op output
# TODO: set error messages for failures in this code
# TODO: use this to accept float32 and int32:
# node.inputs[0].type.dtype_specs()[1]
init_decl = """
npy_intp* Nx = PyArray_DIMS(%(x)s);
npy_intp Sx = 0;
npy_intp Sb = 0;
npy_intp Ssm = 0;
if (PyArray_NDIM(%(x)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
%(fail)s;
}
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s;
}
if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
(PyArray_TYPE(%(x)s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)s;
}
if ((PyArray_TYPE(%(b)s) != NPY_DOUBLE) &&
(PyArray_TYPE(%(b)s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s;
}
if ((PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(b)s)[0]))
{
PyErr_Format(PyExc_ValueError,
"number of columns in x (%%ld) does not match length of b (%%ld)",
(long int)PyArray_DIMS(%(x)s)[1], (long int)PyArray_DIMS(%(b)s)[0]);
%(fail)s;
}
if ((NULL == %(sm)s)
|| (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{
if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
PyArray_TYPE(%(x)s));
if(!%(sm)s) {
PyErr_SetString(PyExc_MemoryError,
"failed to alloc sm output");
%(fail)s
}
}
Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
"""
begin_row_loop = """
for (size_t i = 0; i < Nx[0]; ++i)
{
size_t j;
double sum = 0.0;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0] + b_i[0];
//std::cout << "0 " << row_max << "\\n";
// Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j)
{
dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "1 " << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
}
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j)
{
dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%(sm)s sm_ij = exp(row_ij - row_max);
//std::cout << "3 " << j << " " << sm_ij << "\\n";
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm] *= sum_inv;
}
"""
# Get the vectorized version of exp if it exist
try:
vec_exp = pytensor.scalar.exp.c_code_contiguous_raw(
dtype, "Nx[1]", "sm_i", "sm_i"
)
inside_row_loop_contig = (
"""
for (j = 0; j < Nx[1]; ++j)
{
dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%%(sm)s sm_ij = row_ij - row_max;
//std::cout << "3 " << j << " " << sm_ij << "\\n";
sm_i[j * Ssm] = sm_ij;
}
%(vec_exp)s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm] *= sum_inv;
}
"""
% locals()
)
inside_row_loop = (
"""
if(Ssm == 1){
%(inside_row_loop_contig)s
}else{
%(inside_row_loop)s
}
"""
% locals()
)
except pytensor.graph.utils.MethodNotDefined:
pass
end_row_loop = """
}
"""
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, inp, out, sub):
x, b = inp
(sm,) = out
code_template = "".join(
self.c_code_template(node.inputs[0].type.dtype_specs()[1])
)
return code_template % dict(locals(), **sub)
@staticmethod
def c_code_cache_version():
return (8,)
softmax_with_bias = SoftmaxWithBias()
softmax_grad_legacy = SoftmaxGrad(axis=-1)
softmax_legacy = Softmax(axis=-1)
@register_specialize("fast_compile")
@node_rewriter([softmax_legacy])
def local_softmax_with_bias(fgraph, node):
"""
Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
"""
if node.op == softmax_legacy and node.outputs[0].ndim == 2:
(x,) = node.inputs
if x.owner and x.owner.op == add:
vectors = []
non_vectors = []
for x_in in x.owner.inputs:
if list(x_in.type.broadcastable) == [True, False]:
# print isinstance(x_in.owner.op,
# DimShuffle) since specialization comes
# relatively late in optimization, we don't want to
# put in extra DimShuffles un-necessarily.
if (
x_in.owner
and isinstance(x_in.owner.op, DimShuffle)
and list(x_in.owner.inputs[0].type.broadcastable) == [False]
):
# cut out the DimShuffle that was broadcasting a vector
vectors.append(x_in.owner.inputs[0])
else:
# insert an extra DimShuffle to correct the old one
vectors.append(DimShuffle((True, False), (1,))(x_in))
else:
non_vectors.append(x_in)
# If all the inputs were vectors or broadcasted vectors,
# we broadcast one of them to be used as a matrix
if len(non_vectors) == 0:
assert len(vectors) > 0 # we should have at least 1 input...
promoted_vector = vectors.pop()
non_vectors.append(shape_padleft(promoted_vector))
assert non_vectors # not empty
if vectors:
# we're in business...
if len(vectors) > 1:
vector_sum = add(*vectors)
copy_stack_trace(x_in, vector_sum)
else:
vector_sum = vectors[0]
if len(non_vectors) > 1:
non_vector_sum = add(*non_vectors)
copy_stack_trace(x_in, non_vector_sum)
else:
non_vector_sum = non_vectors[0]
try:
sm_bias = softmax_with_bias(non_vector_sum, vector_sum)
copy_stack_trace(node.outputs[0], sm_bias)
except Exception:
# if our arguments have the wrong types, then
# forget about it
return
out_type = node.outputs[0].type
if (
out_type.dtype == sm_bias.type.dtype
and out_type.broadcastable == sm_bias.type.broadcastable
):
# This condition is not always true. See the test
# nnet/tests/test_basic.py:T_SoftmaxWithBias.test_broadcast
return [sm_bias]
def softmax_simplifier(numerators, denominators):
for numerator in list(numerators):
if not numerator.type.dtype.startswith("float"):
continue
if not (numerator.owner and numerator.owner.op == exp):
continue
matching_denom = None
for denominator in denominators:
# Division with dimshuffle
if denominator.owner and isinstance(denominator.owner.op, DimShuffle):
ds_order = denominator.owner.op.new_order
# Check that at most only one dimension is being reintroduced by
# a dimshuffle. The cases where all dimensions are reintroduced
# after a complete sum reduction end up in the else branch
if ds_order.count("x") != 1:
continue
# Check that dimshuffle does not change order of original dims
ds_order_without_x = tuple(dim for dim in ds_order if dim != "x")
if tuple(sorted(ds_order_without_x)) != ds_order_without_x:
continue
new_dim = ds_order.index("x")
z = denominator.owner.inputs[0]
if z.owner and isinstance(z.owner.op, Sum):
sum_axis = z.owner.op.axis
# Check that reintroduced dim was the one reduced
if (
(sum_axis is not None)
and (len(sum_axis) == 1)
and (sum_axis[0] == new_dim)
):
if z.owner.inputs[0] is numerator:
(sum_axis,) = sum_axis
matching_denom = denominator
break
# Division without dimshuffle
else:
z = denominator
if z.owner and isinstance(z.owner.op, Sum):
sum_axis = z.owner.op.axis
# Filter out partial summations over more than one axis
# The cases where all axis of summation are explicitly given
# as in `sum(matrix, axis=(0, 1))` are eventually rewritten
# to `sum(matrix)` and this branch is not a blocker
if sum_axis is not None and len(sum_axis) != 1:
continue
if z.owner.inputs[0] is numerator:
if sum_axis is not None:
(sum_axis,) = sum_axis
matching_denom = denominator
break
if matching_denom:
softmax = Softmax(axis=sum_axis)(numerator.owner.inputs[0])
copy_stack_trace(numerator, softmax)
numerators.remove(numerator)
denominators.remove(matching_denom)
numerators.append(softmax)
return numerators, denominators
local_mul_canonizer.add_simplifier(softmax_simplifier, "softmax_simplifier")
class CrossentropySoftmaxArgmax1HotWithBias(COp):
"""
A special compound L{Op} for the output of neural-net classifiers.
Parameters
----------
x : a matrix of floats (32 or 64)
b : a [row] vector of floats (32 or 64), length is number of cols in x
y_idx : a [column] vector of int (32 or 64), length is number of rows in x
Returns
-------
object
row-wise NLL, softmax(x+b), row-wise argmax of (x+b).
@precondition: every entry in y_idx is a valid (non-negative)
column index into x
This L{Op} has three outputs:
- KL(softmax(x+b), y)
- softmax(x+b)
- argmax(x+b)
softmax(x[i]) is the i'th distribution over len(x[i]) options
argmax(x) is the index of x's greatest element
y_idx[i] is an integer index, encoding a 1-hot distribution.
In practice, when we are trying to do classification, we have one row in x
and y_idx per example, and y[i] is the index of the (correct) class of the
i'th example.
"""
nin = 3
nout = 3
__props__ = ()
def __init__(self, **kwargs):
super().__init__(**kwargs)
def make_node(self, x, b, y_idx):
x = at.as_tensor_variable(x)
b = at.as_tensor_variable(b)
y_idx = at.as_tensor_variable(y_idx)
if x.type.ndim != 2 or x.type.dtype not in float_dtypes:
raise ValueError("x must be 2-d tensor of floats", x.type)
if b.type.ndim != 1 or x.type.dtype not in float_dtypes:
raise ValueError("b must be 1-d tensor of floats", b.type)
if y_idx.type.ndim != 1 or y_idx.type.dtype not in discrete_dtypes:
raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
# TODO: Is this correct? It used to be y, not y_idx
out_shape = tuple(1 if s == 1 else None for s in y_idx.type.shape)
nll = TensorType(x.type.dtype, shape=out_shape).make_variable()
sm = x.type()
am = y_idx.type()
return Apply(self, [x, b, y_idx], [nll, sm, am])
def perform(self, node, input_storage, output_storage):
"""
The math, where x is an input vector, and t is a target index:
softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
nll(x,t) = -log(softmax(x)[t])
We compute this by subtracting off the max of x. This avoids
numerical instability.
m = max_j x[j]
softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))
nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
= -x[t] + m + log( sum_j(exp(x[j] - m)))
"""
x, b, y_idx = input_storage
if b.shape[0] != x.shape[1]:
raise ValueError("b must have same number of columns as x")
if y_idx.shape[0] != x.shape[0]:
raise ValueError("y_idx must have same number of rows as x")
if any(y_idx < 0):
raise ValueError("y_i value out of bounds")
sm = np.zeros_like(x) # softmax
nll = np.zeros(
x.shape[0], dtype=node.outputs[0].type.dtype
) # nll(y | softmax(x))
am = np.zeros_like(y_idx)
for i in range(sm.shape[0]):
# add the bias vector to the i'th row of x
row = x[i] + b
# get the maximum value of i'th row for numerically safe
# softmax / nll
am[i] = np.argmax(row)
m = row[am[i]]
# compute the unnormalized softmax, and normalization constant
sm[i] = np.exp(row - m)
sum_j = np.sum(sm[i]) # sum_j(exp(x[j] - m))
# normalized our softmax
sm[i] *= 1.0 / sum_j
# store the nll
nll[i] = -row[y_idx[i]] + m + np.log(sum_j)
output_storage[0][0] = nll
output_storage[1][0] = sm
output_storage[2][0] = am
def infer_shape(self, fgraph, node, shapes):
x_shp, b_shp, idx_shp = shapes
nll_shp = (x_shp[0],)
sm_shp = x_shp
am_shp = idx_shp
return [nll_shp, sm_shp, am_shp]
def connection_pattern(self, node):
return [
[True, True, True], # x
[True, True, True], # b
[False, False, True],
] # y_idx
def grad(self, inp, grads):
x, b, y_idx = inp
g_nll, g_sm, g_am = grads
dx_terms = []
db_terms = []
d_idx_terms = []
if not isinstance(g_nll.type, DisconnectedType):
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
db = at_sum(dx, axis=[0])
dx_terms.append(dx)
db_terms.append(db)
if not isinstance(g_sm.type, DisconnectedType):
dx, db = softmax_with_bias.L_op((x, b), [softmax_with_bias(x, b)], (g_sm,))
dx_terms.append(dx)
db_terms.append(db)
if not isinstance(g_am.type, DisconnectedType):
dx_terms.append(x.zeros_like())
db_terms.append(b.zeros_like())
d_idx_terms.append(y_idx.zeros_like())
def fancy_sum(terms):
if len(terms) == 0:
return DisconnectedType()()
rval = terms[0]
for term in terms[1:]:
rval = rval + term
return rval
return [fancy_sum(terms) for terms in [dx_terms, db_terms, d_idx_terms]]
def c_headers(self, **kwargs):
return ["<iostream>", "<cmath>"]
@staticmethod
def c_code_template(dtype):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
# TODO: put this into a templated function, in the support code
# TODO: declare the max of each row as an Op output
# TODO: set error messages for failures in this code
# TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
(
init_decl,
begin_row_loop,
inside_row_loop,
end_row_loop,
) = SoftmaxWithBias.c_code_template(dtype)
return (
init_decl,
"""
if (PyArray_NDIM(%(y_idx)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s;
}
if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_Format(PyExc_ValueError,
"number of rows in x (%%ld) does not match length of y (%%ld)",
(long int)PyArray_DIMS(%(x)s)[0],
(long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s;
}
if ((NULL == %(nll)s) //initial condition
|| (PyArray_DIMS(%(nll)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{
if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
%(nll)s = (PyArrayObject*)PyArray_SimpleNew(1,
PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(x)s));
if(!%(nll)s)
{
PyErr_SetString(PyExc_MemoryError,
"failed to alloc nll output");
%(fail)s;
}
}
if ((NULL == %(am)s)
|| (PyArray_DIMS(%(am)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{
Py_XDECREF(%(am)s);
%(am)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(y_idx)s));
if(!%(am)s)
{
PyErr_SetString(PyExc_MemoryError,
"failed to alloc am output");
%(fail)s;
}
}
""",
begin_row_loop,
"""
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(PyArray_BYTES(%(nll)s) + PyArray_STRIDES(%(nll)s)[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (PyArray_BYTES(%(am)s) + PyArray_STRIDES(%(am)s)[0] * i);
""",
inside_row_loop,
"""
if ((y_i >= PyArray_DIMS(%(x)s)[1]) || (y_i < 0))
{
PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
%(fail)s;
}
nll_i[0] = - x_i[y_i*Sx]
- b_i[y_i*Sb]
+ row_max
+ log(sum);
am_i[0] = row_max_j;
""",
end_row_loop,
)
def c_code_cache_version(self):
return (5,) + SoftmaxWithBias.c_code_cache_version()
def c_code(self, node, name, inp, out, sub):
x, b, y_idx = inp
nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type
dtype = node.inputs[0].type.dtype_specs()[1]
code_template = "".join(self.c_code_template(dtype))
return code_template % dict(locals(), **sub)
class CrossentropySoftmax1HotWithBiasDx(COp):
"""
Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.
"""
nin = 3
nout = 1
__props__ = ()
def make_node(self, dy, sm, y_idx, **kwargs):
dy = at.as_tensor_variable(dy)
sm = at.as_tensor_variable(sm)
y_idx = at.as_tensor_variable(y_idx)
if dy.type.ndim > 1 or dy.type.dtype not in float_dtypes:
raise ValueError("dy must be {0,1}-d tensor of floats", dy.type)
if sm.type.ndim != 2 or sm.type.dtype not in float_dtypes:
raise ValueError("sm must be 2-d tensor of floats", sm.type)
if y_idx.type.ndim != 1 or y_idx.type.dtype not in discrete_dtypes:
raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
return Apply(self, [dy, sm, y_idx], [sm.type()])
def perform(self, node, input_storage, output_storage):
dy, sm, y_idx = input_storage
if any(y_idx < 0):
raise ValueError("y_i value out of bounds")
dx = np.zeros_like(sm)
if dy.ndim == 0:
dy = dy[None]
incr = int(dy.shape[0] > 1)
for i in range(sm.shape[0]):
dy_i = dy[i * incr]
dx[i] = dy_i * sm[i] # vector scale
dx[i, y_idx[i]] -= dy_i # scalar decrement
output_storage[0][0] = dx
def infer_shape(self, fgraph, node, shapes):
return [shapes[1]]
def grad(self, inp, grads):
dy, sm, y_idx = inp
(g_dx,) = grads
# TODO: currently we do not compute the gradient w.r.t. dy, because
# advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although
# typically we should not need the gradient w.r.t. dy).
y_idx_range = at.arange(y_idx.shape[0])
g_dy = at_sum(
g_dx * AdvancedIncSubtensor()(sm, at.fill(dy, -1), y_idx_range, y_idx),
axis=1,
)
g_sm = dy.dimshuffle(0, "x") * g_dx
g_y_idx = grad_not_implemented(self, 2, y_idx)
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self):
return (6,)
def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
(dx,) = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """
if ((PyArray_TYPE(%(dnll)s) != NPY_DOUBLE) &&
(PyArray_TYPE(%(dnll)s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"dnll type should be float32 or float64");
%(fail)s;
}
if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
(PyArray_TYPE(%(sm)s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"sm type should be float32 or float64");
%(fail)s;
}
// new scope because of variable declaration
// TODO: proper indentation, but the diff will get messy
{
// Get `dnll.shape[0]` or set it to zero if `dnll` is a scalar.
const npy_intp %(dnll)s_dims0 = (PyArray_NDIM(%(dnll)s) > 0 ?
PyArray_DIMS(%(dnll)s)[0] :
(npy_intp) 0);
// Get `dnll.strides[0]` and set it to zero if `dnll` is a scalar
// or a vector with just one element.
const npy_intp %(dnll)s_strides0 = (%(dnll)s_dims0 > 1 ?
PyArray_STRIDES(%(dnll)s)[0] :
(npy_intp) 0);
if ((PyArray_NDIM(%(dnll)s) > 1)
|| (PyArray_NDIM(%(sm)s) != 2)
|| (PyArray_NDIM(%(y_idx)s) != 1))
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s;
}
if (%(dnll)s_dims0 != PyArray_DIMS(%(sm)s)[0] && %(dnll)s_dims0 > 1)
{
PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
(long int)%(dnll)s_dims0,
(long int)PyArray_DIMS(%(sm)s)[0]);
%(fail)s;
}
if (%(dnll)s_dims0 != PyArray_DIMS(%(y_idx)s)[0] && %(dnll)s_dims0 > 1)
{
PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
(long int)%(dnll)s_dims0,
(long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s;
}
if (PyArray_DIMS(%(sm)s)[0] !=
PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s)
|| (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
{
if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
%(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
PyArray_DIMS(%(sm)s),
PyArray_TYPE(%(sm)s));
if(!%(dx)s) {
PyErr_SetString(PyExc_MemoryError,
"failed to alloc dx output");
%(fail)s
}
}
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(PyArray_BYTES(%(dnll)s) + %(dnll)s_strides0 * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(PyArray_BYTES(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
}
if (y_i >= PyArray_DIMS(%(dx)s)[1] || (y_i < 0))
{
PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1] or y_i < 0.");
%(fail)s;
}
dx_i[y_i * Sdx] -= dnll_i;
}
}
""" % dict(
locals(), **sub
)
crossentropy_softmax_argmax_1hot_with_bias = CrossentropySoftmaxArgmax1HotWithBias()
crossentropy_softmax_1hot_with_bias_dx = CrossentropySoftmax1HotWithBiasDx()
def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
def crossentropy_softmax_1hot(x, y_idx, **kwargs):
b = at.zeros_like(x[0, :])
return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs):
"""
Returns
-------
object
The cross-entropy, the softmax output, the max probability,
and the argmax index.
TODO: Since we are recomputing the argmax,
we might as well assert that it is correct.
TODO: Make this entire function is
unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
the appropriate information (i.e. the max probability)?
"""
(xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
(max_pr, argmax) = max_and_argmax(softmax, axis=-1)
return (xent, softmax, max_pr, argmax)
def crossentropy_softmax_max_and_argmax_1hot(x, y_idx, **kwargs):
b = at.zeros_like(x[0, :])
return crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs)
class CrossentropyCategorical1HotGrad(Op):
__props__ = ()
def make_node(self, g_y, coding_dist, true_one_of_n):
return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()])
def perform(self, node, inp, out):
g_y, coding_dist, true_one_of_n = inp
(g_coding_strg,) = out
g_coding = np.zeros_like(coding_dist)
for i in range(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i, true_one_of_n[i]]
g_coding_strg[0] = g_coding
def infer_shape(self, fgraph, node, in_shapes):
return [in_shapes[1]]
crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
class CrossentropyCategorical1Hot(Op):
r"""
Compute the cross entropy between a coding distribution and
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0].
.. math::
y[i] = - \log(coding_dist[i, one_of_n[i])
Notes
-----
In the case that the coding distribution is the output of a
softmax, an application of this Op will probably be optimized
away in favour of one with a C implementation.
"""
__props__ = ()
def make_node(self, coding_dist, true_one_of_n):
"""
Parameters
----------
coding_dist : dense matrix
true_one_of_n : lvector
Returns
-------
dvector
"""
_coding_dist = at.as_tensor_variable(coding_dist)
_true_one_of_n = at.as_tensor_variable(true_one_of_n)
if _coding_dist.type.ndim != 2:
raise TypeError("Matrix required for argument `coding_dist`")
if not (
_true_one_of_n.type.ndim == 1
and _true_one_of_n.type.dtype in integer_dtypes
):
raise TypeError("Integer vector required for argument `true_one_of_n`")
return Apply(
self,
[_coding_dist, _true_one_of_n],
[TensorType(dtype=_coding_dist.dtype, shape=(None,))()],
)
def perform(self, node, inp, out):
coding, one_of_n = inp
(y_out,) = out
y = np.zeros_like(coding[:, 0])
for i in range(len(y)):
y[i] = -np.log(coding[i, one_of_n[i]])
y_out[0] = y
def infer_shape(self, fgraph, node, in_shapes):
return [(in_shapes[0][0],)]
def grad(self, inp, grads):
coding, one_of_n = inp
(g_y,) = grads
return [
crossentropy_categorical_1hot_grad(g_y, coding, one_of_n),
grad_not_implemented(self, 1, one_of_n),
]
crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
@register_stabilize("fast_compile")
@register_specialize("fast_compile")
@graph_rewriter
def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
def search_make_one_sub():
for node in fgraph.toposort():
if node.op == crossentropy_categorical_1hot:
(nll,) = node.outputs
sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax_with_bias:
x, b = sm.owner.inputs
(
new_nll,
new_sm,
new_am,
) = crossentropy_softmax_argmax_1hot_with_bias(x, b, one_of_n)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax_with_bias",
)
return True
return False
while search_make_one_sub():
pass
return
@graph_rewriter
def crossentropy_to_crossentropy_with_softmax(fgraph):
"""
This is a stabilization rewrite that is more general than
`crossentropy_to_crossentropy_with_softmax_with_bias`.
Notes
-----
It must be executed after `local_softmax_with_bias` during the
specialization passes.
"""
def search_make_one_sub():
for node in fgraph.toposort():
if node.op == crossentropy_categorical_1hot:
(nll,) = node.outputs
sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax_legacy and sm.ndim == 2:
(x,) = sm.owner.inputs
(
new_nll,
new_sm,
new_am,
) = crossentropy_softmax_argmax_1hot_with_bias(
x, at.zeros_like(x[0]), one_of_n
)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax",
)
return True
if sm.owner and sm.owner.op == softmax_with_bias:
x, b = sm.owner.inputs
(
new_nll,
new_sm,
new_am,
) = crossentropy_softmax_argmax_1hot_with_bias(x, b, one_of_n)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax",
)
return True
return False
while search_make_one_sub():
pass
return
optdb.register(
"crossentropy_to_crossentropy_with_softmax",
crossentropy_to_crossentropy_with_softmax,
"fast_run",
"xent",
"fast_compile",
position=2.01,
)
@register_specialize(
"fast_compile", "local_crossentropy_to_crossentropy_with_softmax_grad"
) # old name
@node_rewriter([softmax_grad_legacy])
def local_softmax_grad_to_crossentropy_with_softmax_grad(fgraph, node):
if node.op == softmax_grad_legacy and node.inputs[1].ndim == 2:
g_coding_dist, coding_dist = node.inputs
if (
g_coding_dist.owner
and g_coding_dist.owner.op == crossentropy_categorical_1hot_grad
):
g_nll, coding_dist, true_one_of_n = g_coding_dist.owner.inputs
dx = crossentropy_softmax_1hot_with_bias_dx(
g_nll, coding_dist, true_one_of_n
)
copy_stack_trace(node.outputs[0], dx)
return [dx]
@register_specialize("fast_compile")
@node_rewriter([MaxAndArgmax])
def local_argmax_pushdown(fgraph, node):
if (
isinstance(node.op, MaxAndArgmax)
and node.inputs[0].owner
and len(fgraph.clients[node.outputs[0]]) == 0
):
x_max, x_argmax = node.outputs
x = node.inputs[0]
axis = node.op.get_params(node)
# TODO: Make a list/set of monotonic ops...
if x.owner and (
x.owner.op
in (
softplus,
exp,
log,
tanh,
sigmoid,
)
or isinstance(x.owner.op, Softmax)
):
(pre_x,) = x.owner.inputs
ret = max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret)
return ret
if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs
ret = max_and_argmax(
pre_x + DimShuffle(pre_bias.broadcastable, ("x", 0))(pre_bias),
axis,
)
# copy both stack traces
copy_stack_trace(x_max, ret)
return ret
def _check_rows_is_arange_len_labels(fgraph, rows, labels):
"""Check that `rows` is the same node as `at.arange(labels.shape[0])`.
Also considers the case where `labels.shape[0]` is constant and equal to 1,
and `at.arange(labels.shape[0])` has been constant-folded into
0.
"""
shape_of = None
if hasattr(fgraph, "shape_feature"):
shape_of = fgraph.shape_feature.shape_of
# TODO: consider cases where shape_of[labels] is constant, and
# has a value different from 1.
# This case is harder, as _is_const only accepts a scalar value
# as second argument, so checking for
# _is_const(rows, numpy.arange(...)) does not work for the moment.
if len(shape_of[labels]) == 1 and _is_const(shape_of[labels][0], 1):
return _is_const(rows, 0)
if rows.owner and isinstance(rows.owner.op, ARange):
start, stop, step = rows.owner.inputs
if getattr(start, "data", None) != 0: # constants will have data
return False
if getattr(step, "data", None) != 1: # constant step will have data
return False
if not stop.owner:
return False
# Not sure if that case happens any more after the introduction of
# ShapeOptimizer, but we keep it if ShapeOptimizer is not present
if isinstance(stop.owner.op, DimShuffle) and stop.owner.op.new_order == ():
shape_var = stop.owner.inputs[0]
if shape_var.owner and isinstance(shape_var.owner.op, Shape):
return shape_var.owner.inputs[0] is labels
elif shape_of:
shape_of = fgraph.shape_feature.shape_of
return shape_of[labels][0] is stop
def _is_const(z, val, approx=False):
try:
maybe = at.get_scalar_constant_value(z)
except NotScalarConstantError:
return False
if approx:
return np.allclose(maybe, val)
else:
return np.all(maybe == val)
@register_specialize("fast_compile")
@node_rewriter([AdvancedSubtensor, log])
def local_advanced_indexing_crossentropy_onehot(fgraph, node):
log_op = None
sm = None
# First case: log(softmax(x))[rows, labels]
if isinstance(node.op, AdvancedSubtensor):
try:
log_op, rows, labels = node.inputs
except Exception:
pass
if log_op and log_op.owner and log_op.owner.op == log:
sm = log_op.owner.inputs[0]
# Second case: log(softmax(x)[rows, labels])
elif node.op == log:
pre_log = node.inputs[0].owner
if pre_log and isinstance(pre_log.op, AdvancedSubtensor):
try:
sm, rows, labels = pre_log.inputs
except Exception:
pass
if (
sm is not None
and sm.owner
and sm.owner.op in (softmax_legacy, softmax_with_bias)
and sm.ndim == 2
):
sm_w_bias = local_softmax_with_bias.transform(fgraph, sm.owner)
if sm_w_bias:
assert sm_w_bias[0].owner.op == softmax_with_bias
x_var, b_var = sm_w_bias[0].owner.inputs
else:
x_var = sm.owner.inputs[0]
b_var = at.zeros_like(x_var[0])
# Check that rows == arange(labels.shape[0])
if _check_rows_is_arange_len_labels(fgraph, rows, labels):
if labels.ndim == 1 and x_var.ndim == 2:
minus_ret = crossentropy_softmax_argmax_1hot_with_bias(
x_var, b_var, labels
)[0]
ret = -minus_ret
copy_stack_trace(node.outputs[0], [minus_ret, ret])
return [ret]
@register_specialize("fast_compile")
@node_rewriter([softmax_grad_legacy])
def local_advanced_indexing_crossentropy_onehot_grad(fgraph, node):
if not (node.op == softmax_grad_legacy and node.inputs[1].ndim == 2):
return
sm = None
try:
d_sm, sm = node.inputs
except Exception:
return
if (
(sm is not None)
and sm.owner
and (sm.owner.op in (softmax_legacy, softmax_with_bias))
and sm.ndim == 2
):
sm_w_bias = local_softmax_with_bias.transform(fgraph, sm.owner)
if sm_w_bias:
assert sm_w_bias[0].owner.op == softmax_with_bias
x_var, b_var = sm_w_bias[0].owner.inputs
else:
x_var = sm.owner.inputs[0]
else:
return
# Two cases are supported:
# 1. AdvancedIncSubtensor(
# zeros_like(softmax(x)),
# -out_grad / AdvancedSubtensor(softmax(x), arange(y.shape[0]), y),
# arange(y.shape[0]),
# y)
# which arises from the gradient of log(softmax(x)[arange(y.shape[0]), y])
#
# 2. AdvancedIncSubtensor(
# zeros_like(log(softmax(x))),
# -out_grad,
# arange(y.shape[0]),
# y)
# / softmax(x)
# which arises from the gradient of log(softmax(x))[arange(y.shape[0]), y]
#
# out_grad represents the gradient of the (final) cost wrt the output.
#
# N.B. Regarding clients -- This substitution is important for numerical stability, so we
# perform the substitution even when intermediate values have multiple clients.
#
# First case.
# After the check for AdvancedIncSubtensor, if anything does not fit with
# the formula above, there's no way to fit it with the the second case,
# so we return immediately.
if d_sm.owner and isinstance(d_sm.owner.op, AdvancedIncSubtensor):
try:
z, incr, rows, labels = d_sm.owner.inputs
except Exception:
return
# Check that z == zeros_like(softmax(x))
# We know z has the right size because z has the same size as d_sm,
# and d_sm and sm are both inputs of softmax_grad (so they have
# the same size).
if not _is_const(z, 0):
return
# In the base case (output gradient = 1), incr is -1./sm[arange(len(y)), y]
# Here, we are looking for the AdvancedSubtensor term (sm[arange(len(y)), y]),
# and constructing out_grad by incorporating the other terms.
# out_grad will be constructed in 3 steps as follow:
# out_grad = +/- 1. (according to sign)
# out_grad *= -numerator
# out_grad /= denominator
# Then, if out_grad is a scalar, it will be allocated as a vector
adv_subtensor = None
out_grad = 1.0
# If there's a 'minus' sign before the whole expression, put it in
# out_grad and iterate
if incr.owner and incr.owner.op == neg:
out_grad = -out_grad
incr = incr.owner.inputs[0]
if incr.owner and incr.owner.op == true_div:
num, denom = incr.owner.inputs
# set out_grad according to the numerator, it may be divided later
# num should be a vector or a scalar
if num.ndim == 1 or all(num.broadcastable):
out_grad *= -num
else:
return
if not denom.owner:
return
if isinstance(denom.owner.op, AdvancedSubtensor):
# Base case
adv_subtensor = denom
# out_grad /= 1.
elif denom.owner.op == mul:
# Try to find the AdvancedSubtensor node mentioned above,
# and the output gradient
for i, input in enumerate(denom.owner.inputs):
if input.owner and isinstance(input.owner.op, AdvancedSubtensor):
other_inputs = [
in_ for (j, in_) in enumerate(denom.owner.inputs) if j != i
]
if len(other_inputs) == 1:
rest = other_inputs[0]
else:
rest = mul(*[other_inputs])
# Check that rest is a vector or a scalar
if rest.ndim == 1 or all(rest.broadcastable):
adv_subtensor = input
out_grad /= rest
break
else:
return
# The output gradient needs to be a vector
out_grad = at.fill(x_var[:, 0], out_grad)
if adv_subtensor is not None:
try:
maybe_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs
except Exception:
return
if not (
maybe_sm is sm and maybe_rows is rows and maybe_labels is labels
):
return
# else: OK
else:
return
else:
return
# Check that rows is arange(labels.shape[0])
if not _check_rows_is_arange_len_labels(fgraph, rows, labels):
return
# else, arguments of AdvancedIncSubtensor are OK,
# it was really case 1.
# Second case
elif d_sm.owner and d_sm.owner.op == true_div:
# we're looking for
# AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax
try:
num, denom = d_sm.owner.inputs
except Exception:
return
if denom != sm:
return
# Check the numerator (AdvancedIncSubtensor)
if num.owner and isinstance(num.owner.op, AdvancedIncSubtensor):
try:
z, incr, rows, labels = num.owner.inputs
except Exception:
return
# Check z is zeros_like(log(sm))
if not _is_const(z, 0):
return
if z.broadcastable not in [(False, False), (True, False)]:
return
# here we know that we are incrementing a matrix of zeros
# (or a broadcasted vector).
# Since d_sm and sm are the inputs of softmax_grad,
# if the graph is valid, they have the same shape, so we
# also know that z has the right shape.
if incr.ndim != 1 or incr.dtype not in float_dtypes:
return
# here we know that we are incrementing some part of
# matrix z by a vector
# unless the user has taken care to mark that the data and
# labels have the same number of rows, we cannot be sure
# here that len(y) == len(z) However, in the common case
# that these are predictions and labels it is true. We
# leave it to the Op to crash (and the user to complain)
# if this assumption is ever not true.
out_grad = -incr
# Check that rows is arange(labels.shape[0])
if not _check_rows_is_arange_len_labels(fgraph, rows, labels):
return
# else, arguments of AdvancedIncSubtensor are OK
else:
return
# numerator and denominator are OK,
# it was really case 2.
else:
return
# Dimension check before substitution
if labels.ndim == 1 and x_var.ndim == 2:
ret = crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)
# The stack trace is not added to output_grad, sm and labels at
# the moment but may need to be added at a future point
copy_stack_trace(node.outputs[0], ret)
return [ret]
else:
return
@register_specialize("fast_compile")
@node_rewriter([softmax_with_bias])
def graph_merge_softmax_with_crossentropy_softmax(fgraph, node):
if node.op == softmax_with_bias:
x, b = node.inputs
for x_client in fgraph.clients[x]:
if x_client[0].op == crossentropy_softmax_argmax_1hot_with_bias:
big_client = x_client[0]
if big_client in [b_client[0] for b_client in fgraph.clients[b]]:
xx, bb, ll = big_client.inputs
mergeable_client = big_client.op(x, b, ll)
copy_stack_trace(node.outputs[0], mergeable_client[1])
return [mergeable_client[1]]
@register_specialize
@register_stabilize
@register_canonicalize
@node_rewriter([CrossentropySoftmax1HotWithBiasDx])
def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(fgraph, node):
"""
Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
an `alloc` of a scalar variable or one that has either broadcastable or
matching dimensions with the output variable, by one that skips the
intermediate `alloc`.
"""
if isinstance(node.op, CrossentropySoftmax1HotWithBiasDx):
dy, sm, y_idx = node.inputs
# Those cases are directly handled by the internal broadcasting of the
# `CrossentropySoftmax1HotWithBiasDx` op.
if dy.ndim == 0:
return False
if dy.ndim == 1 and dy.broadcastable[0]:
return False
assert dy.ndim == 1
if dy.owner is not None and isinstance(dy.owner.op, at.Alloc):
# dz is the input of the Alloc op, i.e. at.alloc(dz, <shape>)
dz = dy.owner.inputs[0]
try:
shape_feature = fgraph.shape_feature
except AttributeError:
# The shape feature may not be available in some mode, but we
# need it for this optimization, so don't continue.
return False
shape_of = shape_feature.shape_of
same_shape = shape_feature.same_shape
# Build `dz_broad` explicitly to include extra implicit dimensions.
dz_broad = (True,) * (dy.ndim - dz.ndim) + dz.broadcastable
# If we can infer statically that the shape of `sm` and
# `dy` are the same in dimension `k` or the shape of `dy` is equal
# to 1 (which triggers the internal broadcasting in
# `CrossentropySoftmax1HotWithBiasDx`) we do not need to
# check it at runtime.
if (
dz_broad[0]
and not same_shape(sm, dy, dim_x=0, dim_y=0)
and shape_of[dy][0] != 1
):
# If `dz` is broadcastable, we need to check whether the shapes
# of `dy` and `sm` are the same or whether the shape of `dy` is
# equal to 1.
cond = or_(eq(dy.shape[0], 1), eq(dy.shape[0], sm.shape[0]))
msg = "`sm` and `dy` do not have the same shape."
dz = Assert(msg)(dz, cond)
ret = node.op(dz, sm, y_idx)
copy_stack_trace(node.outputs[0], ret)
return [ret]
def binary_crossentropy(output, target):
"""
Compute the crossentropy of binary random variables.
Output and target are each expectations of binary random
variables; target may be exactly 0 or 1 but output must
lie strictly between 0 and 1.
Notes
-----
We could use the x log y op to support output=0 and output=1.
The gradient would still be undefined though.
We do not sum, crossentropy is computed by component.
TODO : Rewrite as a scalar, and then broadcast to tensor.
"""
return -(target * log(output) + (1.0 - target) * log(1.0 - output))
def sigmoid_binary_crossentropy(output, target):
"""
Compute the cross-entropy of binary random variables.
`output` should be real-valued (range (-inf, +inf)); `sigmoid` will be
applied to produce a (0, 1) valued input.
`target` is assumed to be probabilities in [0, 1].
Notes
-----
Mathematically equivalent to `binary_crossentropy(sigmoid(output), target)`,
but with more efficient and numerically stable computation.
"""
def grad(inputs, out_grads):
(output, target), (out_grad,) = inputs, out_grads
g_output = out_grad * (sigmoid(output) - target)
g_target = out_grad * (-output)
return [g_output, g_target]
inp = [output, target]
outp = softplus(-abs(output)) + output * ((output > 0) - target)
return pytensor.compile.builders.OpFromGraph(
inp,
[outp],
grad_overrides=grad,
inline=True,
name="sigmoid_binary_crossentropy",
)(*inp)
def categorical_crossentropy(coding_dist, true_dist):
r"""
Return the cross-entropy between an approximating distribution and a true
distribution.
.. warning:: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
We ultimately don't want the polymorphism, and will move this function
to pylearn.algorithms.cost. The 1hot version will be removed.
The length of the documentation here is a form of code smell.
The cross entropy between two probability distributions measures the average
number of bits needed to identify an event from a set of possibilities, if a
coding scheme is used based on a given probability distribution q, rather
than the "true" distribution p.
Mathematically it is defined as follows:
.. math::
H(p,q) = - \sum_x p(x) \log(q(x))
Parameters
----------
coding_dist : a dense matrix
Each slice along axis represents one distribution.
true_dist : a dense matrix or sparse matrix or integer vector
In the case of a matrix argument, each slice along axis represents one
distribution. In the case of an integer vector argument, each element
represents the position of the '1' in a 1-of-N encoding.
Returns
-------
tensor of rank one-less-than `coding_dist`
The cross entropy between each coding and true distribution.
Notes
-----
axis : int
The dimension over which each distribution runs
(1 for row distributions, 0 for column distributions).
"""
if true_dist.ndim == coding_dist.ndim:
return -at_sum(true_dist * log(coding_dist), axis=coding_dist.ndim - 1)
elif true_dist.ndim == coding_dist.ndim - 1:
return crossentropy_categorical_1hot(coding_dist, true_dist)
else:
raise TypeError("rank mismatch between coding and true distributions")
class Prepend_scalar_constant_to_each_row(Op):
__props__ = ()
def __init__(self, val=0):
if isinstance(val, float):
val = aes.constant(val)
self.val = val
def __str__(self):
return f"{self.__class__.__name__}{{{self.val}}}"
def make_node(self, mat):
# check type of input
x = at.as_tensor_variable(mat)
if mat.type.broadcastable != (False, False):
raise TypeError("Expected a matrix as input")
y = at.as_tensor_variable(self.val)
assert y.ndim == 0
if x.type.dtype != y.type.dtype:
TypeError("the value to prepend don't have the same type as the matrix")
node = Apply(op=self, inputs=[mat], outputs=[mat.type()])
return node
def perform(self, node, inp, out):
(mat,) = inp
(output,) = out
new_shape = (mat.shape[0], mat.shape[1] + 1)
if output[0] is None:
output[0] = np.empty(new_shape, dtype=mat.dtype)
out = output[0]
else:
if output[0].shape != new_shape:
try:
output[0].resize(new_shape)
except Exception:
output[0] = np.empty(new_shape, dtype=mat.dtype)
out = output[0]
out[:, 0].fill(self.val.data)
out[:, 1:] = mat
def infer_shape(self, fgraph, node, in_shapes):
shp = (in_shapes[0][0], in_shapes[0][1] + 1)
return [shp]
def grad(self, inp, grads):
(mat,) = inp
(goutput,) = grads
return goutput[:, 1:]
class Prepend_scalar_to_each_row(Op):
__props__ = ()
def make_node(self, val, mat):
# check type of input
x = at.as_tensor_variable(mat)
if isinstance(val, float):
val = aes.constant(val)
if mat.type.broadcastable != (False, False):
raise TypeError("Expected a matrix as input")
y = at.as_tensor_variable(val)
assert y.ndim == 0
if x.type.dtype != y.type.dtype:
TypeError("the value to prepend don't have the same type as the matrix")
node = Apply(op=self, inputs=[val, mat], outputs=[mat.type()])
return node
def perform(self, node, inp, out):
val, mat = inp
(output,) = out
new_shape = (mat.shape[0], mat.shape[1] + 1)
if output[0] is None:
output[0] = np.empty(new_shape, dtype=mat.dtype)
out = output[0]
else:
if output[0].shape != new_shape:
try:
output[0].resize(new_shape)
except Exception:
output[0] = np.empty(new_shape, dtype=mat.dtype)
out = output[0]
out[:, 0].fill(val)
out[:, 1:] = mat
def infer_shape(self, fgraph, node, in_shapes):
shp = (in_shapes[1][0], in_shapes[1][1] + 1)
return [shp]
def grad(self, inp, grads):
val, mat = inp
(goutput,) = grads
return goutput[:, 0], goutput[:, 1:]
prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.0)
prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.0)
def relu(x, alpha=0):
"""
Compute the element-wise rectified linear activation function.
.. versionadded:: 0.7.1
Parameters
----------
x : symbolic tensor
Tensor to compute the activation function for.
alpha : `scalar or tensor, optional`
Slope for negative input, usually between 0 and 1. The default value
of 0 will lead to the standard rectifier, 1 will lead to
a linear activation function, and any value in between will give a
leaky rectifier. A shared variable (broadcastable against `x`) will
result in a parameterized rectifier with learnable slope(s).
Returns
-------
symbolic tensor
Element-wise rectifier applied to `x`.
Notes
-----
This is numerically equivalent to ``switch(x > 0, x, alpha * x)``
(or ``maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
formulation or an optimized Op, so we encourage to use this function.
"""
# This is probably the fastest implementation for GPUs. Both the forward
# pass and the gradient get compiled into a single GpuElemwise call.
# TODO: Check if it's optimal for CPU as well; add an "if" clause if not.
# TODO: Check if there's a faster way for the gradient; create an Op if so.
if alpha == 0:
return 0.5 * (x + abs(x))
else:
# We can't use 0.5 and 1 for one and half. as if alpha is a
# numpy dtype, they will be considered as float64, so would
# cause upcast to float64.
alpha = at.as_tensor_variable(alpha)
f1 = 0.5 * (1 + alpha)
f2 = 0.5 * (1 - alpha)
return f1 * x + f2 * abs(x)
def h_softmax(
x,
batch_size,
n_outputs,
n_classes,
n_outputs_per_class,
W1,
b1,
W2,
b2,
target=None,
):
"""Two-level hierarchical softmax.
This function implements a two-layer hierarchical softmax. It is commonly
used as an alternative of the softmax when the number of outputs is
important (it is common to use it for millions of outputs). See
reference [1]_ for more information about the computational gains.
The `n_outputs` outputs are organized in `n_classes` classes, each class
containing the same number `n_outputs_per_class` of outputs.
For an input `x` (last hidden activation), the first softmax layer predicts
its class and the second softmax layer predicts its output among its class.
If `target` is specified, it will only compute the outputs of the
corresponding targets. Otherwise, if `target` is `None`, it will compute
all the outputs.
The outputs are grouped in classes in the same order as they are initially
defined: if `n_outputs=10` and `n_classes=2`, then the first class is
composed of the outputs labeled `{0,1,2,3,4}` while the second class is
composed of `{5,6,7,8,9}`. If you need to change the classes, you have to
re-label your outputs.
.. versionadded:: 0.7.1
Parameters
----------
x: tensor of shape (batch_size, number of features)
the minibatch input of the two-layer hierarchical softmax.
batch_size: int
the size of the minibatch input x.
n_outputs: int
the number of outputs.
n_classes: int
the number of classes of the two-layer hierarchical softmax. It
corresponds to the number of outputs of the first softmax. See note at
the end.
n_outputs_per_class: int
the number of outputs per class. See note at the end.
W1: tensor of shape (number of features of the input x, n_classes)
the weight matrix of the first softmax, which maps the input x to the
probabilities of the classes.
b1: tensor of shape (n_classes,)
the bias vector of the first softmax layer.
W2: tensor of shape (n_classes, number of features of the input x,
n_outputs_per_class)
the weight matrix of the second softmax, which maps the input x to
the probabilities of the outputs.
b2: tensor of shape (n_classes, n_outputs_per_class)
the bias vector of the second softmax layer.
target: tensor of shape either (batch_size,) or (batch_size, 1)
(optional, default None)
contains the indices of the targets for the minibatch
input x. For each input, the function computes the output for its
corresponding target. If target is None, then all the outputs are
computed for each input.
Returns
-------
tensor of shape (`batch_size`, `n_outputs`) or (`batch_size`, 1)
Output tensor of the two-layer hierarchical softmax for input `x`.
Depending on argument `target`, it can have two different shapes.
If `target` is not specified (`None`), then all the outputs are
computed and the returned tensor has shape (`batch_size`, `n_outputs`).
Otherwise, when `target` is specified, only the corresponding outputs
are computed and the returned tensor has thus shape (`batch_size`, 1).
Notes
-----
The product of `n_outputs_per_class` and `n_classes` has to be greater or
equal to `n_outputs`. If it is strictly greater, then the irrelevant
outputs will be ignored.
`n_outputs_per_class` and `n_classes` have to be the same as the
corresponding dimensions of the tensors of `W1`, `b1`, `W2` and `b2`.
The most computational efficient configuration is when
`n_outputs_per_class` and `n_classes` are equal to the square root of
`n_outputs`.
Examples
--------
The following example builds a simple hierarchical softmax layer.
>>> import numpy as np
>>> import pytensor
>>> import pytensor.tensor as at
>>> from pytensor.tensor.nnet import h_softmax
>>>
>>> # Parameters
>>> batch_size = 32
>>> n_outputs = 100
>>> dim_x = 10 # dimension of the input
>>> n_classes = int(np.ceil(np.sqrt(n_outputs)))
>>> n_outputs_per_class = n_classes
>>> output_size = n_outputs_per_class * n_outputs_per_class
>>>
>>> # First level of h_softmax
>>> floatX = pytensor.config.floatX
>>> W1 = pytensor.shared(
... np.random.normal(0, 0.001, (dim_x, n_classes)).astype(floatX))
>>> b1 = pytensor.shared(np.zeros((n_classes,), floatX))
>>>
>>> # Second level of h_softmax
>>> W2 = np.random.normal(0, 0.001,
... size=(n_classes, dim_x, n_outputs_per_class)).astype(floatX)
>>> W2 = pytensor.shared(W2)
>>> b2 = pytensor.shared(np.zeros((n_classes, n_outputs_per_class), floatX))
>>>
>>> # We can now build the graph to compute a loss function, typically the
>>> # negative log-likelihood:
>>>
>>> x = at.imatrix('x')
>>> target = at.imatrix('target')
>>>
>>> # This only computes the output corresponding to the target.
>>> # The complexity is O(n_classes + n_outputs_per_class).
>>> y_hat_tg = h_softmax(x, batch_size, output_size, n_classes,
... n_outputs_per_class, W1, b1, W2, b2, target)
>>>
>>> negll = -at.mean(at.log(y_hat_tg))
>>>
>>> # We may need to compute all the outputs (at test time usually):
>>>
>>> # This computes all the outputs.
>>> # The complexity is O(n_classes * n_outputs_per_class).
>>> output = h_softmax(x, batch_size, output_size, n_classes,
... n_outputs_per_class, W1, b1, W2, b2)
References
----------
.. [1] J. Goodman, "Classes for Fast Maximum Entropy Training,"
ICASSP, 2001, <http://arxiv.org/abs/cs/0108006>`.
"""
# First softmax that computes the probabilities of belonging to each class
class_probs = softmax(dot(x, W1) + b1)
if target is None: # Computes the probabilities of all the outputs
# Second softmax that computes the output probabilities
activations = tensordot(x, W2, (1, 1)) + b2
output_probs = softmax(activations.reshape((-1, n_outputs_per_class)))
output_probs = output_probs.reshape((batch_size, n_classes, -1))
output_probs = class_probs.dimshuffle(0, 1, "x") * output_probs
output_probs = output_probs.reshape((batch_size, -1))
# output_probs.shape[1] is n_classes * n_outputs_per_class, which might
# be greater than n_outputs, so we ignore the potential irrelevant
# outputs with the next line:
output_probs = output_probs[:, :n_outputs]
else: # Computes the probabilities of the outputs specified by the targets
target = target.flatten()
# Classes to which belong each target
target_classes = target // n_outputs_per_class
# Outputs to which belong each target inside a class
target_outputs_in_class = target % n_outputs_per_class
# Second softmax that computes the output probabilities
activations = sparse_block_dot(
W2.dimshuffle("x", 0, 1, 2),
x.dimshuffle(0, "x", 1),
at.zeros((batch_size, 1), dtype="int32"),
b2,
target_classes.dimshuffle(0, "x"),
)
output_probs = softmax(activations.dimshuffle(0, 2))
target_class_probs = class_probs[at.arange(batch_size), target_classes]
output_probs = output_probs[at.arange(batch_size), target_outputs_in_class]
output_probs = target_class_probs * output_probs
return output_probs
def elu(x, alpha=1):
"""
Compute the element-wise exponential linear activation function [2]_.
.. versionadded:: 0.8.0
Parameters
----------
x : symbolic tensor
Tensor to compute the activation function for.
alpha : scalar
Returns
-------
symbolic tensor
Element-wise exponential linear activation function applied to `x`.
References
-----
.. [2] Djork-Arne Clevert, Thomas Unterthiner, Sepp Hochreiter
"Fast and Accurate Deep Network Learning by
Exponential Linear Units (ELUs)" <http://arxiv.org/abs/1511.07289>`.
"""
return at.switch(x > 0, x, alpha * expm1(x))
def selu(x):
"""Compute the element-wise Scaled Exponential Linear unit [3]_.
.. versionadded:: 0.9.0
Parameters
----------
x : symbolic tensor
Tensor to compute the activation function for.
Returns
-------
symbolic tensor
Element-wise scaled exponential linear activation function applied to `x`.
References
----------
.. [3] Klambauer G, Unterthiner T, Mayr A, Hochreiter S.
"Self-Normalizing Neural Networks" <https://arxiv.org/abs/1706.02515>
"""
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale * elu(x, alpha)
class ScalarSoftsign(UnaryScalarOp):
"""
Softsign activation function
:math:`\\varphi(\\mathbf{x}) = \\frac{1}{1+|x|}`
"""
@staticmethod
def static_impl(x):
return x / (1.0 + abs(x))
def impl(self, x):
return ScalarSoftsign.static_impl(x)
def grad(self, inp, grads):
(x,) = inp
(gz,) = grads
if "float" in x.type.dtype:
d = 1.0 + abs(x)
return [gz / (d * d)]
else:
return NotImplemented
def c_code(self, node, name, inp, out, sub):
(x,) = inp
(z,) = out
if node.inputs[0].type in [aes.float32, aes.float64]:
return f"{z} = {x} / (1.0+fabs({x}));"
raise NotImplementedError("only floating point x is implemented")
scalar_softsign = ScalarSoftsign(aes.upgrade_to_float, name="scalar_softsign")
softsign = Elemwise(scalar_softsign, name="softsign")
def confusion_matrix(actual, pred):
"""
Computes the confusion matrix of given vectors containing
actual observations and predicted observations.
Parameters
----------
actual : 1-d tensor variable
pred : 1-d tensor variable
Returns
-------
conf_mat : Confusion matrix of actual and predictions observations as shown below.
| Predicted
___________|___________
Actual |
|
order : 1-d array of order of entries in rows and columns
Examples
--------
>>> import pytensor
>>> import pytensor.tensor as at
>>> from pytensor.tensor.nnet import confusion_matrix
>>> x = at.vector()
>>> y = at.vector()
>>> f = pytensor.function([x, y], confusion_matrix(x, y))
>>> y_true = [2, 0, 2, 2, 0, 1]
>>> y_pred = [0, 0, 2, 2, 0, 2]
>>> print(f(y_true, y_pred))
[array([[2, 0, 0],
[0, 0, 1],
[1, 0, 2]]), array([ 0., 1., 2.])]
"""
if actual.ndim != 1:
raise ValueError("actual must be 1-d tensor variable")
if pred.ndim != 1:
raise ValueError("pred must be 1-d tensor variable")
order = Unique(False, False, False)(at.concatenate([actual, pred]))
colA = actual.dimshuffle(0, "x")
colP = pred.dimshuffle(0, "x")
oneHotA = eq(colA, order).astype("int64")
oneHotP = eq(colP, order).astype("int64")
conf_mat = dot(oneHotA.T, oneHotP)
return [conf_mat, order]
DEPRECATED_NAMES = [
(
"softmax",
"`pytensor.tensor.nnet.basic.softmax` has been moved to `pytensor.tensor.special.softmax`.",
softmax,
),
(
"logsoftmax",
"`pytensor.tensor.nnet.basic.logsoftmax` has been moved to `pytensor.tensor.special.log_softmax`.",
log_softmax,
),
]
def __getattr__(name):
"""Intercept module-level attribute access of deprecated symbols.
Adapted from https://stackoverflow.com/a/55139609/3006474.
"""
from warnings import warn
for old_name, msg, old_object in DEPRECATED_NAMES:
if name == old_name:
warn(msg, DeprecationWarning, stacklevel=2)
return old_object
raise AttributeError(f"module {__name__} has no attribute {name}")
import numpy as np
import pytensor
from pytensor.configdefaults import config
from pytensor.graph.basic import Apply
from pytensor.graph.op import Op
from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
from pytensor.scalar import Composite, add, as_common_dtype, mul, sub, true_div
from pytensor.tensor import basic as at
from pytensor.tensor.basic import as_tensor_variable
from pytensor.tensor.elemwise import Elemwise
from pytensor.tensor.math import mean, prod, reciprocal, sqrt
from pytensor.tensor.math import sum as at_sum
from pytensor.tensor.rewriting.basic import register_specialize_device
from pytensor.tensor.shape import specify_broadcastable
from pytensor.tensor.type import TensorType
class BNComposite(Composite):
init_param = ("dtype",)
@config.change_flags(compute_test_value="off")
def __init__(self, dtype):
self.dtype = dtype
x = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
mean = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
std = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
gamma = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
beta = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
o = add(mul(true_div(sub(x, mean), std), gamma), beta)
inputs = [x, mean, std, gamma, beta]
outputs = [o]
super().__init__(inputs, outputs)
def grad(self, inps, grads):
x, mean, std, gamma, beta = inps
(top,) = grads
top_gamma = top * gamma
x_mean = x - mean
dx = top_gamma / std
dmean = -dx
dstd = -(top_gamma * x_mean) / (std * std)
dgamma = top * x_mean / std
return [dx, dmean, dstd, dgamma, top]
def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
"""
This function will build the symbolic graph for applying batch normalization
to a set of activations.
.. versionadded:: 0.7.1
Parameters
----------
inputs : symbolic tensor
Mini-batch of activations
gamma: symbolic tensor
BN scale parameter, must be of same dimensionality as
inputs and broadcastable against it
beta: symbolic tensor
BN shift parameter, must be of same dimensionality as
inputs and broadcastable against it
mean: symbolic tensor
inputs means, must be of same dimensionality as
inputs and broadcastable against it
std: symbolic tensor
inputs standard deviation, must be of same dimensionality as
inputs and broadcastable against it
mode: 'low_mem' or 'high_mem'
Specify which batch_normalization implementation that will be
used.
As no intermediate representations are stored for the back-propagation,
'low_mem' implementation lower the memory usage, however,
it is 5-10% slower than 'high_mem' implementation. Note that 5-10% computation
time difference compare the batch_normalization operation only, time difference
between implementation is likely to be less important on the full model fprop/bprop.
"""
if mode == "low_mem":
elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
rval = elm_bn(inputs, mean, std, gamma, beta)
elif mode == "high_mem":
rval = (inputs - mean) * (gamma / std) + beta
else:
raise ValueError('mode must be either "low_mem", "high_mem"')
return rval
def _prepare_batch_normalization_axes(axes, ndim):
if axes == "per-activation":
axes = (0,)
elif axes == "spatial":
axes = (0,) + tuple(range(2, ndim))
elif isinstance(axes, (tuple, list, np.ndarray)):
axes = tuple(int(a) for a in axes)
else:
raise ValueError(f"invalid axes: {axes}")
axes = tuple(sorted(axes))
if len(axes) == 0:
raise ValueError("there should be at least one normalization axis")
if min(axes) < 0 or max(axes) >= ndim:
raise ValueError(
f"axes should be less than ndim (<{int(ndim)}), but {axes} given"
)
non_bc_axes = tuple(i for i in range(ndim) if i not in axes)
return axes, non_bc_axes
def batch_normalization_train(
inputs,
gamma,
beta,
axes="per-activation",
epsilon=1e-4,
running_average_factor=0.1,
running_mean=None,
running_var=None,
):
"""
Performs batch normalization of the given inputs, using the mean and
variance of the inputs.
Parameters
----------
axes : 'per-activation', 'spatial' or a tuple of ints
The axes along which the input should be normalized. ``'per-activation'``
normalizes per activation and is equal to ``axes=(0,)``.
``'spatial'`` shares normalization factors across spatial dimensions
(i.e., all dimensions past the second), which for 4D inputs would be
equal to ``axes=(0, 2, 3)``.
gamma : tensor
Learnable scale factors. The shape must match the shape of `inputs`,
except for the axes in `axes`. These axes should be set to 1 or be
skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
beta : tensor
Learnable biases. Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
running_average_factor : float
Factor for updating the values or `running_mean` and `running_var`.
If the factor is close to one, the running averages will update quickly,
if the factor is close to zero it will update slowly.
running_mean : tensor or None
Previous value of the running mean. If this is given, the new value
``running_mean * (1 - r_a_factor) + batch mean * r_a_factor``
will be returned as one of the outputs of this function.
`running_mean` and `running_var` should either both be given or
both be None. The shape should match that of `gamma` and `beta`.
running_var : tensor or None
Previous value of the running variance. If this is given, the new value
``running_var * (1 - r_a_factor) + (m / (m - 1)) * batch var * r_a_factor``
will be returned as one of the outputs of this function,
where `m` is the product of lengths of the averaged-over dimensions.
`running_mean` and `running_var` should either both be given or
both be None. The shape should match that of `gamma` and `beta`.
Returns
-------
out : tensor
Batch-normalized inputs.
mean : tensor
Means of `inputs` across the normalization axes.
invstd : tensor
Inverse standard deviations of `inputs` across the normalization axes.
new_running_mean : tensor
New value of the running mean (only if both `running_mean` and
`running_var` were given).
new_running_var : tensor
New value of the running variance (only if both `running_var` and
`running_mean` were given).
Notes
-----
If per-activation or spatial normalization is selected, this operation
will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
The returned values are equivalent to:
.. code-block:: python
# for per-activation normalization
axes = (0,)
# for spatial normalization
axes = (0,) + tuple(range(2, inputs.ndim))
mean = inputs.mean(axes, keepdims=True)
var = inputs.var(axes, keepdims=True)
invstd = at.reciprocal(at.sqrt(var + epsilon))
out = (inputs - mean) * gamma * invstd + beta
m = at.cast(ate.prod(inputs.shape) / at.prod(mean.shape), 'float32')
running_mean = running_mean * (1 - running_average_factor) + \\
mean * running_average_factor
running_var = running_var * (1 - running_average_factor) + \\
(m / (m - 1)) * var * running_average_factor
"""
ndim = inputs.ndim
axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim)
# have the parameter tensors been broadcasted yet?
if gamma.ndim == ndim:
params_ndim = ndim
else:
params_ndim = len(non_bc_axes)
params_dimshuffle_pattern = ["x"] * ndim
for i, axis in enumerate(non_bc_axes):
params_dimshuffle_pattern[axis] = i
if gamma.ndim != params_ndim or beta.ndim != params_ndim:
raise ValueError(
"gamma and beta dimensionality must match the "
"number of non-normalized axes, or have the "
"same number of dimensions as the inputs; "
f"got {int(gamma.ndim)} and {int(beta.ndim)} instead of {int(params_ndim)}"
)
if (running_mean is None) != (running_var is None):
raise ValueError(
"running_mean and running_var must either both be given or both be None"
)
if running_mean is not None and running_mean.ndim != params_ndim:
raise ValueError(
"running_mean must be of the same dimensionality "
f"as gamma and beta; got {int(running_mean.ndim)} instead of {int(params_ndim)}"
)
if running_var is not None and running_var.ndim != params_ndim:
raise ValueError(
"running_var must be of the same dimensionality "
f"as gamma and beta; got {int(running_var.ndim)} instead of {int(params_ndim)}"
)
# epsilon will be converted to floatX later. we need to check
# for rounding errors now, since numpy.float32(1e-5) < 1e-5.
epsilon = np.cast[config.floatX](epsilon)
if epsilon < 1e-5:
raise ValueError(f"epsilon must be at least 1e-5, got {epsilon}")
inputs = as_tensor_variable(inputs)
gamma = as_tensor_variable(gamma)
beta = as_tensor_variable(beta)
if params_ndim != ndim:
gamma = gamma.dimshuffle(params_dimshuffle_pattern)
beta = beta.dimshuffle(params_dimshuffle_pattern)
else:
gamma = specify_broadcastable(gamma, *axes)
beta = specify_broadcastable(beta, *axes)
batchnorm_op = AbstractBatchNormTrain(axes=axes)
if running_mean is not None and running_var is not None:
running_mean = as_tensor_variable(running_mean)
running_var = as_tensor_variable(running_var)
if params_ndim != ndim:
running_mean = running_mean.dimshuffle(params_dimshuffle_pattern)
running_var = running_var.dimshuffle(params_dimshuffle_pattern)
else:
running_mean = specify_broadcastable(running_mean, *axes)
running_var = specify_broadcastable(running_var, *axes)
out, mean, invstd, new_running_mean, new_running_var = batchnorm_op(
inputs,
gamma,
beta,
epsilon=epsilon,
running_average_factor=running_average_factor,
running_mean=running_mean,
running_var=running_var,
)
if new_running_mean.broadcastable != running_mean.broadcastable:
new_running_mean = specify_broadcastable(
new_running_mean,
*(ax for (ax, b) in enumerate(running_mean.type.broadcastable) if b),
)
if new_running_var.broadcastable != running_var.broadcastable:
new_running_var = specify_broadcastable(
new_running_var,
*(ax for (ax, b) in enumerate(running_var.type.broadcastable) if b),
)
results = (out, mean, invstd, new_running_mean, new_running_var)
else:
results = batchnorm_op(inputs, gamma, beta, epsilon=epsilon)
if params_ndim != ndim:
# remove the broadcasted dimensions (except from the output)
results = [results[0]] + [r.dimshuffle(non_bc_axes) for r in results[1:]]
return tuple(results)
def batch_normalization_test(
inputs, gamma, beta, mean, var, axes="per-activation", epsilon=1e-4
):
"""
Performs batch normalization of the given inputs, using the given mean and
variance.
Parameters
----------
axes : 'per-activation', 'spatial' or a tuple of ints
The axes along which the input should be normalized. ``'per-activation'``
normalizes per activation and is equal to ``axes=(0,)``.
``'spatial'`` shares normalization factors across spatial dimensions
(i.e., all dimensions past the second), which for 4D inputs would be
equal to ``axes=(0, 2, 3)``.
gamma : tensor
Scale factors. The shape must match the shape of `inputs`,
except for the axes in `axes`. These axes should be set to 1 or be
skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
beta : tensor
Biases. Must match the tensor layout of `gamma`.
mean : tensor
Means. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
var : tensor
Variances. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
Returns
-------
out : tensor
Batch-normalized inputs.
Notes
-----
If per-activation or spatial normalization is selected, this operation
will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
The returned value is equivalent to:
.. code-block:: python
# for per-activation normalization
axes = (0,)
# for spatial normalization
axes = (0,) + tuple(range(2, inputs.ndim))
gamma, beta, mean, var = (at.specify_broadcastable(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / at.sqrt(var + epsilon) + beta
"""
ndim = inputs.ndim
axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim)
# have the parameter tensors been broadcasted yet?
if gamma.ndim == ndim:
params_ndim = ndim
else:
params_ndim = len(non_bc_axes)
params_dimshuffle_pattern = ["x"] * ndim
for i, axis in enumerate(non_bc_axes):
params_dimshuffle_pattern[axis] = i
if gamma.ndim != params_ndim or beta.ndim != params_ndim:
raise ValueError(
"gamma and beta dimensionality must match the "
"number of non-normalized axes, or have the "
"same number of dimensions as the inputs; "
f"got {int(gamma.ndim)} and {int(beta.ndim)} instead of {int(params_ndim)}"
)
if mean.ndim != params_ndim or var.ndim != params_ndim:
raise ValueError(
"mean and var must be of the same dimensionality "
f"as gamma and beta; got {int(mean.ndim)} and {int(var.ndim)} instead of {int(params_ndim)}"
)
# epsilon will be converted to floatX later. we need to check
# for rounding errors now, since numpy.float32(1e-5) < 1e-5.
epsilon = np.cast[config.floatX](epsilon)
if epsilon < 1e-5:
raise ValueError(f"epsilon must be at least 1e-5, got {epsilon}")
gamma = as_tensor_variable(gamma)
beta = as_tensor_variable(beta)
mean = as_tensor_variable(mean)
var = as_tensor_variable(var)
if params_ndim != ndim:
gamma = gamma.dimshuffle(params_dimshuffle_pattern)
beta = beta.dimshuffle(params_dimshuffle_pattern)
mean = mean.dimshuffle(params_dimshuffle_pattern)
var = var.dimshuffle(params_dimshuffle_pattern)
else:
gamma = specify_broadcastable(gamma, *axes)
beta = specify_broadcastable(beta, *axes)
mean = specify_broadcastable(mean, *axes)
var = specify_broadcastable(var, *axes)
batchnorm_op = AbstractBatchNormInference(axes=axes)
return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon)
class AbstractBatchNormTrain(Op):
"""
Abstract Op for Batch Normalization.
Parameters
----------
axes : a tuple of ints
The axes along which the input should be normalized.
x : tensor
The input to be normalized along `axes`.
scale : tensor
`scale` should have the same number of dimensions as `x`.
All dimensions listed in `axes` should have length 1.
bias : tensor
`bias` should have the same number of dimensions as `x`.
All dimensions listed in `axes` should have length 1.
epsilon
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
running_average_factor : float
Factor for updating the values or `running_mean` and `running_var`.
If the factor is close to one, the running averages will update quickly,
if the factor is close to zero it will update slowly.
running_mean : tensor or None
Previous value of the running mean. If this is given, the new value
``running_mean * (1 - running_average_factor) + batch mean * running_average_factor``
will be returned as one of the outputs of this function.
`running_mean` and `running_var` should either both be given or
both be None.
running_var : tensor or None
Previous value of the running variance. If this is given, the new value
``running_var * (1 - running_average_factor) + (m / (m - 1)) * batch var * running_average_factor``
will be returned as one of the outputs of this function,
where `m` is the product of lengths of the averaged-over dimensions.
`running_mean` and `running_var` should either both be given or
both be None.
"""
__props__ = ("axes",)
def __init__(self, axes=(0,)):
assert isinstance(axes, (tuple, list))
assert len(axes) > 0
axes = tuple(int(a) for a in axes)
self.axes = axes
def infer_shape(self, fgraph, node, shape):
return [shape[0]] + [shape[1]] * (len(node.outputs) - 1)
def make_node(
self,
x,
scale,
bias,
epsilon=1e-4,
running_average_factor=0.1,
running_mean=None,
running_var=None,
):
x = as_tensor_variable(x)
scale = as_tensor_variable(scale)
bias = as_tensor_variable(bias)
epsilon = as_tensor_variable(epsilon)
running_average_factor = as_tensor_variable(running_average_factor)
if running_mean is not None:
running_mean = as_tensor_variable(running_mean)
if running_var is not None:
running_var = as_tensor_variable(running_var)
assert x.ndim == scale.ndim == bias.ndim
assert (running_mean is None and running_var is None) or (
running_mean is not None and running_var is not None
)
assert running_mean is None or running_mean.ndim == x.ndim
assert running_var is None or running_var.ndim == x.ndim
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon and running_average_factor)
if running_mean:
x, scale, bias, running_mean, running_var = as_common_dtype(
x, scale, bias, running_mean, running_var
)
else:
x, scale, bias = as_common_dtype(x, scale, bias)
inputs = [x, scale, bias, epsilon, running_average_factor]
output_types = [x.type(), scale.type(), scale.type()]
if running_mean is not None and running_var is not None:
inputs.append(running_mean)
inputs.append(running_var)
output_types.append(scale.type())
output_types.append(scale.type())
return Apply(self, inputs, output_types)
def L_op(self, inputs, outputs, grads):
x, scale, bias, epsilon, running_average_factor = inputs[:5]
dy = grads[0]
_, x_mean, x_invstd = outputs[:3]
disconnected_outputs = [
pytensor.gradient.DisconnectedType()(), # epsilon
pytensor.gradient.DisconnectedType()(),
] # running_average_factor
# Optional running_mean and running_var.
for i in range(5, len(inputs)):
disconnected_outputs.append(pytensor.gradient.DisconnectedType()())
return (
AbstractBatchNormTrainGrad(self.axes)(
x, dy, scale, x_mean, x_invstd, epsilon
)
+ disconnected_outputs
)
def connection_pattern(self, node):
# Specify that epsilon and running_average_factor are not connected to outputs.
patterns = [
[True, True, True], # x
[True, True, True], # scale
[True, True, True], # bias
[False, False, False], # epsilon
[False, False, False],
] # running_average_factor
# Optional running_mean and running_var are only
# connected to their new values.
for i in range(5, len(node.inputs)):
patterns[0].append(True)
for pattern in patterns[1:]:
pattern.append(False)
patterns.append([False] * (3 + i - 5) + [True])
return patterns
def perform(self, node, inputs, output_storage):
x, scale, bias, epsilon, running_average_factor = inputs[:5]
axes = self.axes
if min(axes) < 0 or max(axes) >= x.ndim:
raise ValueError(
f"axes should be less than ndim (<{x.ndim}), but {axes} given"
)
mean = x.mean(axes, keepdims=True)
var = x.var(axes, keepdims=True)
invstd = 1.0 / np.sqrt(var + epsilon)
out = (x - mean) * (scale * invstd) + bias
output_storage[0][0] = out
output_storage[1][0] = mean
output_storage[2][0] = invstd
if len(inputs) > 5:
running_mean = inputs[5]
running_mean = (
running_mean * (1.0 - running_average_factor)
+ mean * running_average_factor
)
output_storage[3][0] = running_mean
if len(inputs) > 6:
m = float(np.prod(x.shape) / np.prod(scale.shape))
running_var = inputs[6]
running_var = (
running_var * (1.0 - running_average_factor)
+ (m / (m - 1)) * var * running_average_factor
)
output_storage[4][0] = running_var
class AbstractBatchNormInference(Op):
"""
Abstract Op for Batch Normalization.
Parameters
----------
axes : a tuple of ints
The axes along which the input is normalized.
epsilon
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
"""
__props__ = ("axes",)
def __init__(self, axes=(0,)):
assert isinstance(axes, (tuple, list))
assert len(axes) > 0
axes = tuple(int(a) for a in axes)
self.axes = axes
def infer_shape(self, fgraph, node, shape):
return [shape[0]]
def make_node(
self, x, scale, bias, estimated_mean, estimated_variance, epsilon=1e-4
):
x = as_tensor_variable(x)
scale = as_tensor_variable(scale)
bias = as_tensor_variable(bias)
estimated_mean = as_tensor_variable(estimated_mean)
estimated_variance = as_tensor_variable(estimated_variance)
epsilon = as_tensor_variable(epsilon)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x, scale, bias, estimated_mean, estimated_variance = as_common_dtype(
x, scale, bias, estimated_mean, estimated_variance
)
assert (
x.ndim
== scale.ndim
== bias.ndim
== estimated_mean.ndim
== estimated_variance.ndim
)
return Apply(
self,
[x, scale, bias, estimated_mean, estimated_variance, epsilon],
[x.type()],
)
def grad(self, inputs, grads):
x, scale, bias, est_mean, est_var, epsilon = inputs
dy = grads[0]
axes = self.axes
if min(axes) < 0 or max(axes) >= x.ndim:
raise ValueError(
f"axes should be less than ndim (<{x.ndim}), but {axes} given"
)
scale, bias, est_mean, est_var = (
specify_broadcastable(t, *axes) for t in (scale, bias, est_mean, est_var)
)
# define helper expressions
est_var_eps = est_var + epsilon
est_std = sqrt(est_var_eps)
two = at.constant(2.0)
# define and return gradients
dx = dy * (scale / est_std)
dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std
dbias = dy.sum(axes, keepdims=True)
dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (
scale / (two * est_var_eps * est_std)
)
return [dx, dscale, dbias, dmean, dvar, pytensor.gradient.DisconnectedType()()]
def connection_pattern(self, node):
# Specify that epsilon is not connected to outputs.
return [[True], [True], [True], [True], [True], [False]]
def perform(self, node, inputs, output_storage):
x, scale, bias, estimated_mean, estimated_variance, epsilon = inputs
out = (x - estimated_mean) * (
scale / np.sqrt(estimated_variance + epsilon)
) + bias
output_storage[0][0] = out
class AbstractBatchNormTrainGrad(Op):
__props__ = ("axes",)
def __init__(self, axes=(0,)):
assert isinstance(axes, (tuple, list))
assert len(axes) > 0
axes = tuple(int(a) for a in axes)
self.axes = axes
def make_node(self, x, dy, scale, x_mean, x_invstd, epsilon=1e-4):
x = as_tensor_variable(x)
dy = as_tensor_variable(dy)
scale = as_tensor_variable(scale)
x_mean = as_tensor_variable(x_mean)
x_invstd = as_tensor_variable(x_invstd)
epsilon = as_tensor_variable(epsilon)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x, dy, scale, x_mean, x_invstd = as_common_dtype(x, dy, scale, x_mean, x_invstd)
assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
return Apply(
self,
[x, dy, scale, x_mean, x_invstd, epsilon],
[x.type(), scale.type(), scale.type()],
)
def grad(self, inp, grads):
x, dy, scale, x_mean, x_invstd, epsilon = inp
ddinputs, ddscale, ddbias = grads
x_diff = x - x_mean
mean_dy_x_diff = mean(dy * x_diff, axis=self.axes, keepdims=True)
# compute gradients given each of the output gradients
g_wrt_x = 0
g_wrt_dy = 0
g_wrt_scale = 0
g_wrt_x_mean = 0
g_wrt_x_invstd = 0
if not isinstance(ddinputs.type, pytensor.gradient.DisconnectedType):
ccc = scale * (ddinputs - mean(ddinputs, axis=self.axes, keepdims=True))
ddd = (x_invstd**3) * (
ccc * mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
g_wrt_x = g_wrt_x - ddd
g_wrt_dy = g_wrt_dy + (
(ccc * x_invstd)
- (
(x_invstd**3)
* x_diff
* mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
)
eee = (dy * x_invstd) - ((x_invstd**3) * x_diff * mean_dy_x_diff)
g_wrt_scale = g_wrt_scale + at_sum(
ddinputs * (eee - mean(eee, axis=self.axes, keepdims=True)),
axis=self.axes,
keepdims=True,
)
g_wrt_x_mean = g_wrt_x_mean + at_sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + at_sum(
ccc * (dy - 3 * (x_invstd**2) * x_diff * mean_dy_x_diff),
axis=self.axes,
keepdims=True,
)
if not isinstance(ddscale.type, pytensor.gradient.DisconnectedType):
g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
g_wrt_x_mean = g_wrt_x_mean - (
x_invstd * ddscale * at_sum(dy, axis=self.axes, keepdims=True)
)
g_wrt_x_invstd = g_wrt_x_invstd + (
ddscale * at_sum(dy * x_diff, axis=self.axes, keepdims=True)
)
if not isinstance(ddbias.type, pytensor.gradient.DisconnectedType):
g_wrt_dy = g_wrt_dy + at.fill(dy, ddbias)
# depending on which output gradients are given,
# some inputs should be disconnected
results = [
g_wrt_x,
g_wrt_dy,
g_wrt_scale,
g_wrt_x_mean,
g_wrt_x_invstd,
pytensor.gradient.DisconnectedType()(),
]
return [
pytensor.gradient.DisconnectedType()()
if (isinstance(r, int) and r == 0)
else r
for r in results
]
def connection_pattern(self, node):
return [
[True, True, False], # x
[True, True, True], # dy
[True, False, False], # scale
[True, True, False], # x_mean
[True, True, False], # x_invstd
[False, False, False],
] # epsilon
def infer_shape(self, fgraph, node, shape):
return [shape[0], shape[2], shape[2]]
def perform(self, node, inputs, output_storage):
x, dy, scale, x_mean, x_invstd, epsilon = inputs
axes = self.axes
if min(axes) < 0 or max(axes) >= x.ndim:
raise ValueError(
f"axes should be less than ndim (<{x.ndim}), but {axes} given"
)
x_diff = x - x_mean
mean_dy_x_diff = np.mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - (x_diff * mean_dy_x_diff * (x_invstd**3))
g_wrt_inputs = scale * (c - np.mean(c, axis=axes, keepdims=True))
g_wrt_scale = np.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = np.sum(dy, axis=axes, keepdims=True)
output_storage[0][0] = g_wrt_inputs
output_storage[1][0] = g_wrt_scale
output_storage[2][0] = g_wrt_bias
@node_rewriter([AbstractBatchNormTrain])
def local_abstract_batch_norm_train(fgraph, node):
if not isinstance(node.op, AbstractBatchNormTrain):
return None
x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
axes = node.op.axes
if min(axes) < 0 or max(axes) > x.ndim:
return None
if (
not isinstance(x.type, TensorType)
or not isinstance(scale.type, TensorType)
or not isinstance(bias.type, TensorType)
or not isinstance(epsilon.type, TensorType)
or not isinstance(running_average_factor.type, TensorType)
):
return None
# optional running_mean and running_var
if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
return None
if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
return None
mean = x.mean(axes, keepdims=True)
var = x.var(axes, keepdims=True)
# The epsilon should not upcast the dtype.
if var.dtype == "float32" and epsilon.dtype == "float64":
epsilon = epsilon.astype("float32")
invstd = reciprocal(sqrt(var + epsilon))
out = (x - mean) * (scale * invstd) + bias
results = [out, mean, invstd]
if len(node.inputs) > 5:
running_mean = node.inputs[5]
running_mean = (
running_mean * (1.0 - running_average_factor)
+ mean * running_average_factor
)
results.append(running_mean)
if len(node.inputs) > 6:
m = at.cast(prod(x.shape) / prod(scale.shape), config.floatX)
running_var = node.inputs[6]
running_var = (
running_var * (1.0 - running_average_factor)
+ (m / (m - 1)) * var * running_average_factor
)
results.append(running_var)
for var in pytensor.graph.basic.vars_between(node.inputs, results):
if var not in node.inputs:
copy_stack_trace(node.outputs[0], var)
return results
@node_rewriter([AbstractBatchNormTrainGrad])
def local_abstract_batch_norm_train_grad(fgraph, node):
if not isinstance(node.op, AbstractBatchNormTrainGrad):
return None
x, dy, scale, x_mean, x_invstd, epsilon = node.inputs
axes = node.op.axes
if min(axes) < 0 or max(axes) > x.ndim:
return None
if (
not isinstance(x.type, TensorType)
or not isinstance(dy.type, TensorType)
or not isinstance(scale.type, TensorType)
or not isinstance(x_mean.type, TensorType)
or not isinstance(x_invstd.type, TensorType)
or not isinstance(epsilon.type, TensorType)
):
return None
x_diff = x - x_mean
mean_dy_x_diff = mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd**3))
g_wrt_inputs = scale * (c - mean(c, axis=axes, keepdims=True))
g_wrt_scale = at_sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = at_sum(dy, axis=axes, keepdims=True)
results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
for var in pytensor.graph.basic.vars_between(node.inputs, results):
if var not in node.inputs:
copy_stack_trace(node.outputs[0], var)
return results
@node_rewriter([AbstractBatchNormInference])
def local_abstract_batch_norm_inference(fgraph, node):
if not isinstance(node.op, AbstractBatchNormInference):
return None
x, scale, bias, estimated_mean, estimated_variance, epsilon = node.inputs
if (
not isinstance(x.type, TensorType)
or not isinstance(scale.type, TensorType)
or not isinstance(bias.type, TensorType)
or not isinstance(estimated_mean.type, TensorType)
or not isinstance(estimated_variance.type, TensorType)
or not isinstance(epsilon.type, TensorType)
):
return None
# The epsilon should not upcast the dtype.
if estimated_variance.dtype == "float32" and epsilon.dtype == "float64":
epsilon = epsilon.astype("float32")
result = (x - estimated_mean) * (scale / sqrt(estimated_variance + epsilon)) + bias
for var in pytensor.graph.basic.vars_between(node.inputs, [result]):
if var not in node.inputs:
copy_stack_trace(node.outputs[0], var)
return [result]
# Register Cpu Optimization
bn_groupopt = pytensor.graph.rewriting.db.LocalGroupDB()
bn_groupopt.__name__ = "batchnorm_opts"
register_specialize_device(bn_groupopt, "fast_compile", "fast_run")
bn_groupopt.register(
"local_abstract_batch_norm_train",
local_abstract_batch_norm_train,
"fast_compile",
"fast_run",
position=30,
)
bn_groupopt.register(
"local_abstract_batch_norm_train_grad",
local_abstract_batch_norm_train_grad,
"fast_compile",
"fast_run",
position=30,
)
bn_groupopt.register(
"local_abstract_batch_norm_inference",
local_abstract_batch_norm_inference,
"fast_compile",
"fast_run",
position=30,
)
from typing import List
import numpy as np
import pytensor
from pytensor.gradient import grad_undefined
from pytensor.graph.basic import Apply
from pytensor.graph.op import Op
from pytensor.tensor.type import discrete_dtypes
class SparseBlockGemv(Op):
"""
This op computes the dot product of specified pieces of vectors
and matrices, returning pieces of vectors::
for b in range(batch_size):
for j in range(o.shape[1]):
for i in range(h.shape[1]):
o[b, j, :] += numpy.dot(h[b, i], W[iIdx[b, i], oIdx[b, j]])
where b, h, W, o iIdx, oIdx are defined in the docstring of make_node.
.. image:: ../../../images/blocksparse.png
:scale: 50 %
"""
__props__ = ("inplace",)
registered_opts: List = []
def __init__(self, inplace=False):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def make_node(self, o, W, h, inputIdx, outputIdx):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : batch, oWin, oSize
output vector
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + o[j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower
layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which
blocks will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
o = pytensor.tensor.as_tensor_variable(o)
W = pytensor.tensor.as_tensor_variable(W)
h = pytensor.tensor.as_tensor_variable(h)
inputIdx = pytensor.tensor.as_tensor_variable(inputIdx)
outputIdx = pytensor.tensor.as_tensor_variable(outputIdx)
if o.ndim != 3:
raise TypeError("The output o must be a 2D tensor")
if W.ndim != 4:
raise TypeError("The weight matrix W must be a 4D tensor")
if h.ndim != 3:
raise TypeError("The input h must be a 3D tensor")
if inputIdx.ndim != 2:
raise TypeError("The input indices inputIdx must be a 2D tensor")
if outputIdx.ndim != 2:
raise TypeError("The output indices outputIdx must be a 2D tensor")
assert inputIdx.type.dtype in discrete_dtypes
assert outputIdx.type.dtype in discrete_dtypes
return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
def perform(self, node, inp, out_):
o, W, h, iIdx, oIdx = inp[:5]
if not self.inplace:
o = o.copy()
for b in range(o.shape[0]):
for j in range(o.shape[1]):
outputIdx = oIdx[b, j]
for i in range(h.shape[1]):
inputIdx = iIdx[b, i]
w = W[inputIdx, outputIdx]
o[b, j, :] += np.dot(h[b, i], w)
out_[0][0] = o
def infer_shape(self, fgraph, node, input_shapes):
return [input_shapes[0]]
def grad(self, inputs, grads):
o, W, h, inputIdx, outputIdx = inputs
go = grads[0]
outer_fun = SparseBlockOuter(self.inplace)
gemv_fun = SparseBlockGemv(self.inplace)
Wgrad = outer_fun(W.zeros_like(), h, go, inputIdx, outputIdx)
hgrad = gemv_fun(
h.zeros_like(), W.dimshuffle((1, 0, 3, 2)), go, outputIdx, inputIdx
)
return [
go,
Wgrad,
hgrad,
grad_undefined(self, 3, inputIdx, "grad of inputIdx makes no sense"),
grad_undefined(self, 4, outputIdx, "grad of outputIdx makes no sense"),
]
class SparseBlockOuter(Op):
"""
This computes the outer product of two sets of pieces of vectors
updating a full matrix with the results::
for b in range(batch_size):
o[xIdx[b, i], yIdx[b, j]] += (alpha * outer(x[b, i], y[b, j]))
This op is involved in the gradient of SparseBlockGemv.
"""
__props__ = ("inplace",)
registered_opts: List = []
def __init__(self, inplace=False):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : xBlocks, yBlocks, xSize, ySize
x : batch, xWin, xSize
y : batch, yWin, ySize
xIdx : batch, iWin
indexes of the x blocks
yIdx : batch, oWin
indexes of the y blocks
Returns
-------
(xBlocks, yBlocks, xSize, ySize)
outer(x[i], y[j]) + o[i, j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `xBlocks` is the total number of blocks in x.
- `xSize` is the size of each of these x blocks.
- `xWin` is the number of blocks that will be used as x. Which blocks
will be used is specified in `xIdx`.
- `yBlocks` is the number or possible y blocks.
- `ySize` is the size of each of these y blocks.
- `yWin` is the number of y blocks that will actually be computed.
Which blocks will be computed is specified in `yIdx`.
"""
one = pytensor.tensor.constant(np.asarray(1.0, dtype="float32"))
o = pytensor.tensor.as_tensor_variable(o)
x = pytensor.tensor.as_tensor_variable(x)
y = pytensor.tensor.as_tensor_variable(y)
if alpha is None:
alpha = one
return Apply(self, [o, x, y, xIdx, yIdx, alpha], [o.type()])
def infer_shape(self, fgraph, node, input_shapes):
return [input_shapes[0]]
def perform(self, node, inp, out_):
o, x, y, xIdx, yIdx, alpha = inp[:6]
if not self.inplace:
o = o.copy()
for b in range(x.shape[0]):
for i in range(xIdx.shape[1]):
for j in range(yIdx.shape[1]):
o[xIdx[b, i], yIdx[b, j]] += np.outer(x[b, i], y[b, j, :])
out_[0][0] = o
sparse_block_gemv = SparseBlockGemv(False)
sparse_block_gemv_inplace = SparseBlockGemv(True)
sparse_block_outer = SparseBlockOuter(False)
sparse_block_outer_inplace = SparseBlockOuter(True)
def sparse_block_dot(W, h, inputIdx, b, outputIdx):
"""
Compute the dot product (plus bias) of the specified pieces of vectors
and matrices. See SparseBlockGemv to get more information.
The parameter types are actually their expected shapes relative to
each other.
Parameters
----------
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
b : oBlocks, oSize
bias vector
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + b[j] but b[j] is only added once
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which blocks
will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
assert inputIdx.ndim == h.ndim - 1
assert outputIdx.ndim == inputIdx.ndim
if h.ndim == 2:
h = h.dimshuffle("x", 0, 1)
inputIdx = inputIdx.dimshuffle("x", 0)
outputIdx = outputIdx.dimshuffle("x", 0)
return SparseBlockGemv()(b.take(outputIdx, axis=0), W, h, inputIdx, outputIdx)
#section support_code
typedef struct ctc_context {
struct ctcOptions options;
void * workspace;
int * input_lengths;
int * flat_labels;
int * label_lengths;
} ctc_context_t;
void ctc_context_init(ctc_context_t * context)
{
struct ctcOptions * options = &(context->options);
memset(options, 0, sizeof(struct ctcOptions));
options->loc = CTC_CPU;
#if defined(_OPENMP)
options->num_threads = omp_get_num_threads();
#else
options->num_threads = 1;
#endif
context->workspace = NULL;
context->input_lengths = NULL;
context->flat_labels = NULL;
context->label_lengths = NULL;
}
void ctc_context_destroy(ctc_context_t * context)
{
free( context->workspace );
free( context->input_lengths );
free( context->flat_labels );
free( context->label_lengths );
}
int ctc_check_result(ctcStatus_t retcode, const char * msg)
{
if( CTC_STATUS_SUCCESS != retcode )
{
// Get error message from underlying library
const char * ctc_msg = ctcGetStatusString( retcode );
PyErr_Format( PyExc_RuntimeError,
"ConnectionistTemporalClassification: %s CTC error: %s",
msg,
ctc_msg );
return 1;
}
return 0;
}
void create_contiguous_input_lengths( PyArrayObject * input_lengths_arr,
int ** input_lengths )
{
npy_int num_elements = PyArray_DIMS( input_lengths_arr )[0];
*input_lengths = (int *) calloc( num_elements, sizeof(int) );
if ( NULL == (*input_lengths) )
return;
for( npy_int elem_idx = 0; elem_idx < num_elements; ++elem_idx )
{
(*input_lengths)[elem_idx] = *( (npy_int *) PyArray_GETPTR1( input_lengths_arr, elem_idx ) );
}
}
void create_flat_labels( PyArrayObject * label_matrix, int ** flat_labels,
int ** label_lengths )
{
npy_int rows = PyArray_DIMS( label_matrix )[0];
npy_int cols = PyArray_DIMS( label_matrix )[1];
*flat_labels = (int *) calloc( rows * cols, sizeof(int) );
if ( NULL == (*flat_labels) )
return;
*label_lengths = (int *) calloc( rows, sizeof(int) );
if ( NULL == (*label_lengths) )
{
free( *flat_labels );
*flat_labels = NULL;
return;
}
npy_int label_index = 0;
for( npy_int row_idx = 0; row_idx < rows; ++row_idx )
{
npy_int label_length = 0;
for( npy_int col_idx = 0; col_idx < cols; ++col_idx )
{
npy_int label = *( (npy_int *) PyArray_GETPTR2( label_matrix, row_idx, col_idx ) );
if ( label >= 0 ) // negative values are assumed to be padding
{
(*flat_labels)[ label_index++ ] = label;
++label_length;
}
}
(*label_lengths)[ row_idx ] = label_length;
}
}
#section support_code_apply
int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject * in_activations,
PyArrayObject * in_labels,
PyArrayObject * in_input_lengths,
PyArrayObject ** out_costs,
PyArrayObject ** out_gradients)
{
ctc_context_t ctc_object;
ctc_context_t * context = &ctc_object;
ctc_context_init( context );
if ( !PyArray_IS_C_CONTIGUOUS( in_activations ) )
{
PyErr_SetString( PyExc_RuntimeError,
"ConnectionistTemporalClassification: activations array must be C-contiguous." );
return 1;
}
npy_float32 * activations = (npy_float32 *) PyArray_DATA( in_activations );
create_contiguous_input_lengths( in_input_lengths, &(context->input_lengths) );
if ( NULL == context->input_lengths )
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
PyErr_Format( PyExc_MemoryError,
"ConnectionistTemporalClassification: Could not allocate memory for input lengths" );
return 1;
}
// flatten labels to conform with library memory layout
create_flat_labels( in_labels, &(context->flat_labels), &(context->label_lengths) );
if ( ( NULL == context->label_lengths ) || ( NULL == context->flat_labels ) )
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
PyErr_Format( PyExc_MemoryError,
"ConnectionistTemporalClassification: Could not allocate memory for labels and their lengths" );
return 1;
}
npy_int minibatch_size = PyArray_DIMS( in_activations )[1];
npy_int alphabet_size = PyArray_DIMS( in_activations )[2];
npy_float32 * costs = NULL;
npy_intp cost_size = minibatch_size;
if ( (*out_costs) == NULL || // Symbolic variable has no memory backing
PyArray_NDIM( *out_costs ) != 1 || // or, matrix has the wrong size
PyArray_DIMS( *out_costs )[0] != cost_size )
{
Py_XDECREF( *out_costs );
// Allocate new matrix
*out_costs = (PyArrayObject *) PyArray_ZEROS( 1, &cost_size, NPY_FLOAT32, 0 );
if ( NULL == (*out_costs) )
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
PyErr_Format( PyExc_MemoryError,
"ConnectionistTemporalClassification: Could not allocate memory for CTC costs" );
return 1;
}
}
costs = (npy_float32 *) PyArray_DATA( *out_costs );
npy_float32 * gradients = NULL;
if ( NULL != out_gradients ) // If gradient computation is not disabled
{
if ( NULL == (*out_gradients) || // Symbolic variable has no real backing
PyArray_NDIM( *out_gradients ) != 3 ||
PyArray_DIMS( *out_gradients )[0] != PyArray_DIMS( in_activations )[0] ||
PyArray_DIMS( *out_gradients )[1] != PyArray_DIMS( in_activations )[1] ||
PyArray_DIMS( *out_gradients )[2] != PyArray_DIMS( in_activations )[2] )
{
// Existing matrix is the wrong size. Make a new one.
// Decrement ref counter to existing array
Py_XDECREF( *out_gradients );
// Allocate new array
*out_gradients = (PyArrayObject *) PyArray_ZEROS(3, PyArray_DIMS( in_activations ),
NPY_FLOAT32, 0);
if ( NULL == (*out_gradients) )
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
PyErr_Format( PyExc_MemoryError,
"ConnectionistTemporalClassification: Could not allocate memory for CTC gradients!" );
return 1;
}
}
gradients = (npy_float32 *) PyArray_DATA( *out_gradients );
}
size_t cpu_workspace_size;
int ctc_error;
ctc_error = ctc_check_result( get_workspace_size( context->label_lengths,
context->input_lengths, alphabet_size, minibatch_size, context->options,
&cpu_workspace_size ),
"Failed to obtain CTC workspace size." );
if ( ctc_error ) // Exception is set by ctc_check_result, return error here
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
return 1;
}
context->workspace = malloc( cpu_workspace_size );
if ( NULL == context->workspace )
{
// Destroy previous CTC context before returning exception
ctc_context_destroy( context );
PyErr_Format( PyExc_MemoryError,
"ConnectionistTemporalClassification: Failed to allocate memory for CTC workspace." );
return 1;
}
ctc_error = ctc_check_result( compute_ctc_loss( activations, gradients,
context->flat_labels, context->label_lengths, context->input_lengths,
alphabet_size, minibatch_size, costs, context->workspace,
context->options ), "Failed to compute CTC loss function." );
if ( ctc_error ) // Exception is set by ctc_check_result, return error here
{
ctc_context_destroy( context );
return 1;
}
ctc_context_destroy( context );
return 0;
}
"""
Contains an Op for convolving input images with a set of filters. This was
developed especially for Convolutional Neural Networks.
For related ops, including downsampling and subsampling, see
tensor.signal and tensor.signal.pool.
See especially conv2d().
"""
import logging
import warnings
import numpy as np
try:
from scipy.signal.signaltools import _bvalfromboundary, _valfrommode
from scipy.signal.sigtools import _convolve2d
except ImportError:
from scipy.signal._signaltools import _bvalfromboundary, _valfrommode
from scipy.signal._sigtools import _convolve2d
import pytensor
from pytensor.graph.basic import Apply
from pytensor.link.c.op import OpenMPOp
from pytensor.tensor import blas
from pytensor.tensor.basic import as_tensor_variable, get_scalar_constant_value
from pytensor.tensor.exceptions import NotScalarConstantError
from pytensor.tensor.nnet.abstract_conv import (
get_conv_output_shape,
get_conv_shape_1axis,
)
from pytensor.tensor.shape import specify_broadcastable
from pytensor.tensor.type import discrete_dtypes, tensor
__docformat__ = "restructuredtext en"
_logger = logging.getLogger("pytensor.tensor.nnet.conv")
def conv2d(
input,
filters,
image_shape=None,
filter_shape=None,
border_mode="valid",
subsample=(1, 1),
**kargs,
):
"""Build the symbolic graph for convolving a stack of input images with a set of filters.
The implementation is modelled after Convolutional Neural Networks
(CNN). It is simply a wrapper to the `ConvOp` but provides a much cleaner
interface.
This is deprecated.
Parameters
----------
input : symbolic 4D tensor
Mini-batch of feature map stacks, of shape
(batch size, stack size, nb row, nb col)
see the optional parameter image_shape
filters: symbolic 4D tensor
Set of filters used in CNN layer of shape
(nb filters, stack size, nb row, nb col)
see the optional parameter filter_shape
border_mode : {'valid', 'full'}
'valid'only apply filter to complete patches of the image. Generates
output of shape: image_shape - filter_shape + 1.
'full' zero-pads image to multiple of filter shape to generate output
of shape: image_shape + filter_shape - 1.
subsample: tuple of len 2
Factor by which to subsample the output. Also called strides elsewhere.
image_shape: None, tuple/list of len 4 of int, None or Constant variable
The shape of the input parameter.
Optional, used for optimization like loop unrolling
You can put None for any element of the list to tell that this element
is not constant.
filter_shape : None, tuple/list of len 4 of int, None or Constant variable
Optional, used for optimization like loop unrolling
You can put None for any element of the list
to tell that this element is not constant.
kwargs
Kwargs are passed onto ConvOp. Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc).
openmp: By default have the same value as
config.openmp. For small image, filter,
batch size, nkern and stack size, it can be
faster to disable manually openmp. A fast and
incomplete test show that with image size
6x6, filter size 4x4, batch size==1,
n kern==1 and stack size==1, it is faster
to disable it in valid mode. But if we
grow the batch size to 10, it is faster
with openmp on a core 2 duo.
Returns
-------
symbolic 4D tensor
Set of feature maps generated by convolutional layer. Tensor is
of shape (batch size, nb filters, output row, output col).
"""
warnings.warn(
"pytensor.tensor.nnet.conv.conv2d is deprecated."
" Use pytensor.tensor.nnet.conv2d instead.",
DeprecationWarning,
)
# accept Constant value for image_shape and filter_shape.
if image_shape is not None:
image_shape = list(image_shape)
for i in range(len(image_shape)):
if image_shape[i] is not None:
try:
image_shape[i] = get_scalar_constant_value(
as_tensor_variable(image_shape[i])
)
except NotScalarConstantError:
raise NotScalarConstantError(
"The convolution need that the shape"
" information are constant values. We got"
" {image_shape[i]} for the image_shape parameter"
)
assert image_shape[i].dtype in discrete_dtypes
image_shape[i] = int(image_shape[i])
if filter_shape is not None:
filter_shape = list(filter_shape)
for i in range(len(filter_shape)):
if filter_shape[i] is not None:
try:
filter_shape[i] = get_scalar_constant_value(
as_tensor_variable(filter_shape[i])
)
except NotScalarConstantError:
raise NotScalarConstantError(
"The convolution need that the shape"
" information are constant values. We got"
" {filter_shape[i]} for the filter_shape "
"parameter"
)
assert filter_shape[i].dtype in discrete_dtypes
filter_shape[i] = int(filter_shape[i])
if image_shape and filter_shape:
try:
if image_shape[1] is not None and filter_shape[1] is not None:
assert image_shape[1] == filter_shape[1]
except Exception:
print("image ", image_shape, " filters ", filter_shape)
raise
if filter_shape is not None:
nkern = filter_shape[0]
kshp = filter_shape[2:]
else:
nkern, kshp = None, None
if image_shape is not None:
bsize = image_shape[0]
imshp = image_shape[1:]
else:
bsize, imshp = None, None
op = ConvOp(
output_mode=border_mode,
dx=subsample[0],
dy=subsample[1],
imshp=imshp,
kshp=kshp,
nkern=nkern,
bsize=bsize,
**kargs,
)
return op(input, filters)
class ConvOp(OpenMPOp):
r"""
This Op serves a dual purpose: it can implement a vanilla 2D convolution
(as taught in any signal processing class) or implement the
convolutional layers found in Convolutional Neural Networks.
In this setting, a set of 3D images is convolved with a set of 3D kernels,
with the particularity that their leading dimensions are of equal length.
Vanilla 2D convolution is treated as a special case of this.
The input parameter represents a mini-batch of multiple images. Its shape is:
batch size x num. input feature maps x image height x image width
The kernel parameter represents a set of 3D kernels. Its shape is:
number of filters x num. input images x filter height x filter width
The output of ConvOp is a 4D tensor, generated as follows:
output[b,k,:,:] = \sum_i input[b,i,:,:] * filter[k,i,:,:] \forall b,k
where b is the mini-batch index, k the filter index and * is the
convolution operator.
The constructor initializes a ConvOp with given output_mode (full/valid).
All other parameters are optional and are only used to generate more
optimized c code, or to enable graph optimizers to optimally replace the
ConvOp.
NOTES ON OPTIMIZATION:
There are two types of optimization. The first is the selection of the
fastest algo when bsize and nkern are provided with imshp and kshp.
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code when all shape are know.
This make a significant difference for the 'full' output_mode.
Sometimes, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
Parameters
----------
output_mode : {'valid', 'full'}
'valid' gives an output smaller then the image.
'full' gives an output bigger then the image.
See 'border_mode' in conv2d's doc.
Optional parameters: (will generate more optimal c code)
imshp : tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
Stacksize, nb image row, nb image col.
kshp : tuple of len 2
Nb kernel row, nb kernel col.
nkern : int
The number of kernel.
bsize : int
The size of the minibatch.
dx : int
Patch stride rows.
dy : int
Patch stride cols
Params which select the version of code used:
unroll_patch : bool
Use a version of c_code that unroll the patch loop that don't
request all shape information to work, but if all shape information
are present, will use it to hardcode the value in the code for
faster code.
unroll_batch : int
Use a version of c_code that unroll the batch (by unroll_batch)
and the nkern (by unroll_kern) loop. The size must by a multiple
of bsize or nkern respectively.
unroll_kern : int
Use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern respectively.
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
imshp_logical
Default None. None value is equivalent to imshp value.
When imshp_logical != imshp, it tell we need to insert 0 in
the image before we do the convolution. For example, when dx==dy==2
and the image is [[1, 2], [3, 4]], we should make as if the image
was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
Our python code insert the zero, but the c code optimize it.
imshp_logical != imshp when taking the grad again the weights or
the image when the output_mode is full and `dx != 1` or `dy != 1`.
kshp_logical
Idem but for kshp and used for the grad again the
weights when the output_mode is valid and `dx != 1` or `dy != 1`.
kshp_logical_top_aligned
Used in the same case. Default to True.
Set to False in the grad again the weight when the
output_mode is full.
"""
__attrnames = [
"imshp",
"kshp",
"nkern",
"bsize",
"dx",
"dy",
"out_mode",
"unroll_batch",
"unroll_kern",
"unroll_patch",
"imshp_logical",
"kshp_logical",
"kshp_logical_top_aligned",
]
"""These attributes uniquely identify the behaviour of this op for
given inputs. Do not set openmp here.
"""
# the value of speed_unroll_batch_kern,speed_unroll_patch_noshape,speed_unroll_patch_shape
# have bean calculated on maggie36 when their is only 1 session logged on and only this was running.
# It is an Intel(R) Xeon(R) CPU E5430 @ 2.66GHz. It is computer with pytensor/tensor/nnet/tests/speed_test_conv.py
# and took 5 minutes to run.
# TODO: we should compute this table for each computer/os as this can change.
# I saw on one computer that the speed with the shape can be slower than without!
# using the real shape and the same dtype could also help.
# unroll_batch, unroll_kern, valid time, full time
speed_unroll_batch_kern = [
(1, 1, 2.4661250114440918, 6.5472931861877441),
(1, 2, 1.5869178771972656, 5.1499760150909424),
(1, 3, 1.4270510673522949, 3.6593470573425293),
(1, 4, 1.3373479843139648, 3.3451821804046631),
(1, 5, 1.2818830013275146, 3.1444568634033203),
(1, 6, 1.2521560192108154, 3.0256359577178955),
(1, 10, 1.2134110927581787, 2.9174180030822754),
(2, 1, 1.657214879989624, 4.5261678695678711),
(2, 2, 1.2123160362243652, 2.9747390747070312),
(2, 3, 1.0758891105651855, 2.5690360069274902),
(2, 4, 1.0683329105377197, 2.4233770370483398),
(2, 5, 1.0955719947814941, 2.3999948501586914),
(2, 6, 1.5935721397399902, 2.6878271102905273),
(2, 10, 1.8511250019073486, 3.2417428493499756),
(3, 1, 1.5948119163513184, 3.631148099899292),
(3, 2, 1.0761330127716064, 2.6011371612548828),
(3, 3, 1.0551531314849854, 2.4200370311737061),
(3, 4, 1.3930759429931641, 2.5211219787597656),
(3, 5, 1.4330689907073975, 2.5704989433288574),
(3, 6, 1.362138032913208, 2.5964410305023193),
(3, 10, 1.6582000255584717, 2.9907989501953125),
(4, 1, 1.4793620109558105, 3.3473429679870605),
(4, 2, 1.0671560764312744, 2.4171769618988037),
(4, 3, 1.2569692134857178, 2.2807950973510742),
(4, 4, 1.3456289768218994, 2.6219108104705811),
(4, 5, 1.4055080413818359, 2.4606490135192871),
(4, 6, 1.372107982635498, 2.551663875579834),
(4, 10, 1.599470853805542, 2.9172940254211426),
(5, 1, 1.4115700721740723, 3.2077109813690186),
(5, 2, 1.0635769367218018, 2.2648060321807861),
(5, 3, 1.3842809200286865, 2.6135518550872803),
(5, 4, 1.3470511436462402, 2.3852400779724121),
(5, 5, 1.3539440631866455, 2.5245928764343262),
(5, 6, 1.4037849903106689, 2.5985310077667236),
(5, 10, 1.6120610237121582, 2.8127608299255371),
(6, 1, 1.3623628616333008, 3.021122932434082),
(6, 2, 1.1697649955749512, 2.6285450458526611),
(6, 3, 1.2980999946594238, 2.4746189117431641),
(6, 4, 1.3739941120147705, 2.5579929351806641),
(6, 5, 1.3967819213867188, 2.5522029399871826),
(6, 6, 1.4279270172119141, 2.6127138137817383),
(6, 10, 1.605496883392334, 2.864037036895752),
(10, 1, 1.6401121616363525, 2.970099925994873),
(10, 2, 1.46710205078125, 2.7231831550598145),
(10, 3, 1.4193780422210693, 2.6087639331817627),
(10, 4, 1.4657118320465088, 2.6246678829193115),
(10, 5, 1.5052611827850342, 2.6542458534240723),
(10, 6, 1.5214400291442871, 2.7243161201477051),
(10, 10, 1.6116268634796143, 2.956165075302124),
]
# valid time, full time
speed_unroll_patch_noshape = [2.0109100341796875, 5.8175678253173828]
# valid time, full time
speed_unroll_patch_shape = [1.2967290878295898, 5.5283889770507812]
@staticmethod
def has_all_shape(imshp, kshp, nkern=1, bsize=1):
return (
nkern is not None
and bsize is not None
and all(shp is not None for shp in imshp)
and all(shp is not None for shp in kshp)
)
@staticmethod
def getOutputShape(inshp, kshp, stride=(1, 1), mode="valid"):
"""
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp". Accepts symbolic or integer shapes.
Propagates `None`s (for unknown shapes).
Parameters
----------
inshp
(rows,cols) of input image.
kshp
(rows,cols) of filters.
mode: {'valid', 'full'}
See 'border_mode' in conv2d's doc.
Returns
-------
object
(rows,cols) of output image.
"""
# The formula would be ceil((i + s * k - s * 1) / float(d)),
# with s=1 for mode=='full' and s=-1 for mode=='valid'.
# To support symbolic shapes, we express this with integer arithmetic.
warnings.warn(
"`getOutputShape` is deprecated; use `get_conv_output_shape` instead.",
DeprecationWarning,
stacklevel=2,
)
return tuple(
get_conv_shape_1axis(i, k, mode, d) for i, k, d in zip(inshp, kshp, stride)
)
def __init__(
self,
imshp=None,
kshp=None,
nkern=None,
bsize=None,
dx=1,
dy=1,
output_mode="valid",
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
imshp_logical=None,
kshp_logical=None,
kshp_logical_top_aligned=True,
verbose=False,
openmp=None,
):
# Expand unknown image / kernel shapes into tuples of Nones
if imshp is None:
imshp = (None, None, None)
else:
imshp = tuple(imshp)
if kshp is None:
kshp = (None, None)
else:
kshp = tuple(kshp)
# Check imshp and kshp dimensionality
if len(imshp) == 2:
imshp = (1,) + imshp
elif len(imshp) != 3:
raise ValueError(f"len(imshp) must be 2 or 3, got {len(imshp)}")
if len(kshp) != 2:
raise ValueError(f"len(kshp) must be 2, got {len(kshp)}")
# We must continue to consider None as 1 for backward compatibility.
if dx is None:
dx = 1
if dy is None:
dy = 1
if int(dx) != dx:
raise TypeError("ConvOp.__init__ param dx must be an int", dx)
dx = int(dx)
if int(dy) != dy:
raise TypeError("ConvOp.__init__ param dy must be an int", dy)
dy = int(dy)
all_shape = self.has_all_shape(imshp, kshp, nkern, bsize)
if (unroll_batch or unroll_kern) and not all_shape:
raise ValueError(
"In ConvOp, when using unroll_batch and"
" unroll_nkern, all shape are needed"
)
# Init the openmp attribute
super().__init__(openmp=openmp)
if not all_shape or self.openmp:
# Only this version is parallelized
unroll_patch = True
self.verbose = verbose
self.imshp = imshp
self.kshp = kshp
self.nkern = nkern
self.bsize = bsize
self.dx = dx
self.dy = dy
# a triple
if imshp_logical is None:
self.imshp_logical = self.imshp
else:
imshp_logical = tuple(imshp_logical)
if len(imshp_logical) != 3:
raise ValueError(
f"len(imshp_logical) must be 3, got {len(imshp_logical)}"
)
self.imshp_logical = imshp_logical
# a pair
if kshp_logical is None:
self.kshp_logical = self.kshp
else:
kshp_logical = tuple(kshp_logical)
if len(kshp_logical) != 2:
raise ValueError(
f"len(kshp_logical) must be 2, got {len(kshp_logical)}"
)
self.kshp_logical = kshp_logical
# a bool
self.kshp_logical_top_aligned = kshp_logical_top_aligned
self.unroll_batch = unroll_batch
self.unroll_kern = unroll_kern
self.unroll_patch = unroll_patch
if self.unroll_batch and not self.unroll_kern:
self.unroll_kern = 1
if self.unroll_kern and not self.unroll_batch:
self.unroll_batch = 1
# downcast unroll_batch if not a divisor of batch size
if (
self.unroll_batch is not None
and self.unroll_batch > 0
and self.bsize % self.unroll_batch != 0
):
if self.bsize <= self.unroll_batch:
self.unroll_batch = self.bsize
else:
# find the maximum value under unroll_batch that would work
new = self.unroll_batch
assert new >= 1
while self.bsize % new != 0:
new -= 1
warnstr = (
"In ConvOp.__init__(): "
f"unroll_batch({self.unroll_batch}) must be 0 or a divisor of"
f" bsize({self.bsize}). We revert it to {new}. This"
" won't change the result, but may make it slower."
)
_logger.warning(warnstr)
self.unroll_batch = new
# downcast unroll_kern if not a divisor of nb of kernel
if (
self.unroll_kern is not None
and self.unroll_kern > 0
and self.nkern % self.unroll_kern != 0
):
if self.nkern <= self.unroll_kern:
self.unroll_kern = self.nkern
else:
# find the maximum value under unroll_kern that would work
new = self.unroll_kern
assert new >= 1
while self.nkern % new != 0:
new -= 1
warnstr = (
"In ConvOp.__init__(): "
f"unroll_kern({self.unroll_kern}) must be 0 or a divisor of"
f" nkern({self.nkern}). We revert it to {new}. This"
" won't change the result, but may make it slower."
)
_logger.warning(warnstr)
self.unroll_kern = new
self.outshp = get_conv_output_shape(
(None,) + self.imshp_logical,
(
None,
None,
)
+ self.kshp_logical,
output_mode,
(dx, dy),
)[2:]
self.fulloutshp = get_conv_output_shape(
(None,) + self.imshp_logical,
(
None,
None,
)
+ self.kshp_logical,
output_mode,
(1, 1),
)[2:]
self.out_mode = output_mode
if self.out_mode not in ("valid", "full"):
raise NotImplementedError(f"Mode {self.out_mode} not implemented")
if any((shp is not None) and (shp <= 0) for shp in self.outshp):
raise ValueError(
"Bad size for the output shape. Verify that [post-"
f"supersampling] input shape ({self.imshp_logical}) and kern"
f" shape({self.kshp_logical}) are ok. (Hint: kerns must fit inside"
" image in valid mode)"
)
if (
self.unroll_kern is None
and self.unroll_batch is None
and self.unroll_patch is None
):
# no version specified. Find the faster we have
if self.bsize is None and self.nkern is None:
self.unroll_patch = True
elif self.bsize is not None and self.nkern is not None:
bsize = self.bsize
nkern = self.nkern
mode_idx = 0
if self.out_mode != "valid":
mode_idx = 1
if self.has_all_shape(self.imshp, self.kshp):
time_unroll_patch = self.speed_unroll_patch_shape[mode_idx]
else:
time_unroll_patch = self.speed_unroll_patch_noshape[mode_idx]
time_unroll_batch_kern = 9999999
for i in range(len(self.speed_unroll_batch_kern)):
if (
bsize % self.speed_unroll_batch_kern[i][0] == 0
and nkern % self.speed_unroll_batch_kern[i][1] == 0
):
if (
self.speed_unroll_batch_kern[i][2 + mode_idx]
< time_unroll_batch_kern
):
time_unroll_batch_kern = self.speed_unroll_batch_kern[i][
2 + mode_idx
]
time_unroll_batch_kern_idx = i
if time_unroll_patch < time_unroll_batch_kern:
self.unroll_patch = True
else:
self.unroll_batch = self.speed_unroll_batch_kern[
time_unroll_batch_kern_idx
][0]
self.unroll_kern = self.speed_unroll_batch_kern[
time_unroll_batch_kern_idx
][1]
self.unroll_patch = False
_logger.debug(
"AUTO FIND VERSION OF C_CODE OF CONV OP %s %s %s %s %s %s %s",
self.unroll_batch,
self.unroll_kern,
self.unroll_patch,
self.bsize,
self.nkern,
time_unroll_patch,
time_unroll_batch_kern,
)
self._rehash()
def __eq__(self, other):
if type(self) != type(other):
return False
for a in self.__attrnames:
if getattr(self, a) != getattr(other, a):
return False
return True
def __setstate__(self, d):
super().__setstate__(d)
self._rehash()
def _rehash(self):
hashval = hash(type(self))
for a in self.__attrnames:
hashval = hashval ^ hash(getattr(self, a))
self.__hashval = hashval
def __hash__(self):
return self.__hashval
def __str__(self):
return (
"ConvOp{"
+ ",".join(str((a, getattr(self, a))) for a in self.__attrnames)
+ "}"
)
def flops(self, inputs, outputs):
"""
Useful with the hack in profiling to print the MFlops.
"""
images, kerns = inputs
(out,) = outputs
assert images[1] == kerns[1]
flops = 0
if self.out_mode == "valid":
# nb mul and add by output pixel
flops = kerns[2] * kerns[3] * 2
# nb flops by output image
flops *= out[2] * out[3]
# nb patch multiplied
flops *= images[1] * kerns[0] * images[0]
else:
flops = (
images[0]
* kerns[0]
* images[1]
* kerns[2]
* kerns[3]
* images[2]
* images[3]
* 2
)
return flops
def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
"""
Parameters
----------
inputs
4 dim: batches x stacksize x rows x cols.
kerns
4 dim: nkern x stackidx x rows x cols.
"""
_inputs = as_tensor_variable(inputs)
_kerns = as_tensor_variable(kerns)
# TODO: lift this restriction by upcasting either inputs or kerns
if _inputs.ndim != 4:
raise TypeError(
"ConvOp (make_node) requires input be a 4D tensor;"
f' received "{inputs}" ({_inputs.ndim} dims)'
)
if _kerns.ndim != 4:
raise TypeError("make_node requires 4D tensor of kernels")
if _inputs.type.dtype != _kerns.type.dtype:
raise NotImplementedError(
"The image and the kernel must have the same type."
"inputs({_inputs.dtype}), kerns({_kerns.dtype})"
)
out_shape = (
_inputs.type.shape[0],
_kerns.type.shape[0],
self.outshp[0],
self.outshp[1],
)
out_shape = tuple(1 if s == 1 else None for s in out_shape)
output = tensor(
dtype=_inputs.type.dtype,
shape=out_shape,
)
return Apply(self, [_inputs, _kerns], [output])
def infer_shape(self, fgraph, node, input_shapes):
imshp = input_shapes[0] # 4D image shape
kshp = input_shapes[1] # 4D filter shape
bsize, imshp = imshp[0], list(imshp[1:])
nkern, kshp = kshp[0], list(kshp[2:])
# replace symbolic shapes with known shapes
if self.bsize is not None:
bsize = self.bsize
for i in (0, 1, 2):
if self.imshp_logical[i] is not None:
imshp[i] = self.imshp_logical[i]
if self.nkern is not None:
nkern = self.nkern
for i in (0, 1):
if self.kshp_logical[i] is not None:
kshp[i] = self.kshp_logical[i]
# infer output shape from what we have
res = get_conv_output_shape(
(bsize,) + tuple(imshp),
(
nkern,
None,
)
+ tuple(kshp),
self.out_mode,
(self.dx, self.dy),
)
return [res]
def perform(self, node, inp, out):
"""
By default if len(img2d.shape)==3, we TODO
"""
img2d, filtersflipped = inp
(z,) = out
# TODO: move these back out to global scope when they no longer
# cause an atexit error
imshp = self.imshp
if any(x is None for x in imshp):
imshp = tuple(img2d.shape[1:])
if imshp != img2d.shape[1:]:
raise ValueError(
"The image shape provided at build time "
"is different from the one passed at run time",
imshp,
img2d.shape[1:],
)
kshp = self.kshp
if any(x is None for x in kshp):
kshp = tuple(filtersflipped.shape[2:])
if kshp != filtersflipped.shape[2:]:
raise ValueError(
"The filter shape provided at build time "
"is different from the one passed at run time",
kshp,
filtersflipped.shape[2:],
)
bsize = self.bsize
if bsize is None:
bsize = img2d.shape[0]
elif bsize != img2d.shape[0]:
raise ValueError(
"The batch size provided at build time "
"is different from the one passed at run time",
bsize,
img2d.shape[0],
)
nkern = self.nkern
if nkern is None:
nkern = filtersflipped.shape[0]
elif nkern != filtersflipped.shape[0]:
raise ValueError(
"The number of filters provided at build time "
"is different from the one passed at run time",
nkern,
filtersflipped.shape[0],
)
imshp_logical = self.imshp_logical
if imshp_logical[0] is None:
imshp_logical = (imshp[0],) + imshp_logical[1:]
if imshp_logical[1] is None:
imshp_logical = (imshp_logical[0], imshp[1], imshp_logical[2])
if imshp_logical[2] is None:
imshp_logical = imshp_logical[:2] + (imshp[2],)
assert all(x is not None for x in imshp_logical)
kshp_logical = self.kshp_logical
if kshp_logical[0] is None:
kshp_logical = (kshp[0], kshp_logical[1])
if kshp_logical[1] is None:
kshp_logical = (kshp_logical[0], kshp[1])
assert all(x is not None for x in kshp_logical)
if all(shp is not None for shp in self.fulloutshp):
fulloutshp = tuple(self.fulloutshp)
else:
fulloutshp = get_conv_output_shape(
(None,) + imshp_logical,
(
None,
None,
)
+ kshp_logical,
self.out_mode,
(1, 1),
)[2:]
if (
z[0] is None
or z[0].shape
!= (
bsize,
nkern,
)
+ fulloutshp
):
z[0] = np.zeros(
(
bsize,
nkern,
)
+ fulloutshp,
dtype=img2d.dtype,
)
zz = z[0]
stacklen = imshp[0]
img2d = img2d.reshape((bsize,) + imshp)
filtersflipped = filtersflipped.reshape((nkern, stacklen) + kshp)
if self.imshp != self.imshp_logical:
# assuming that to get from imshp to imshp logical we insert zeros in missing spots
rstride = int(np.ceil(imshp_logical[1] / float(imshp[1])))
cstride = int(np.ceil(imshp_logical[2] / float(imshp[2])))
buf = np.zeros((bsize,) + imshp_logical, dtype=img2d.dtype)
buf[:, :, ::rstride, ::cstride] = img2d
img2d = buf
del buf, rstride, cstride
if kshp != kshp_logical:
rstride = int(np.ceil(kshp_logical[0] / float(kshp[0])))
cstride = int(np.ceil(kshp_logical[1] / float(kshp[1])))
buf = np.zeros(
(nkern, stacklen) + self.kshp_logical, dtype=filtersflipped.dtype
)
if self.kshp_logical_top_aligned:
roffset = coffset = 0
else:
roffset = (
kshp_logical[0] - (kshp[0] * rstride) - 1 + rstride
) % rstride
coffset = (
kshp_logical[1] - (kshp[1] * cstride) - 1 + cstride
) % cstride
assert roffset >= 0
assert coffset >= 0
buf[:, :, roffset::rstride, coffset::cstride] = filtersflipped
filtersflipped = buf
del buf, rstride, cstride
val = _valfrommode(self.out_mode)
bval = _bvalfromboundary("fill")
with warnings.catch_warnings():
warnings.simplefilter("ignore", np.ComplexWarning)
for b in range(bsize):
for n in range(nkern):
zz[b, n, ...].fill(0)
for im0 in range(stacklen):
# some cast generates a warning here
zz[b, n, ...] += _convolve2d(
img2d[b, im0, ...],
filtersflipped[n, im0, ...],
1,
val,
bval,
0,
)
if False:
if False and self.out_mode == "full":
img2d2 = np.zeros(
(
bsize,
stacklen,
imshp[1] + 2 * kshp[0] - 2,
imshp[2] + 2 * kshp[1] - 2,
)
)
img2d2[
:,
:,
kshp[0] - 1 : kshp[0] - 1 + imshp[1],
kshp[1] - 1 : kshp[1] - 1 + imshp[2],
] = img2d
img2d = img2d2
# N_image_shape = image_data.shape
for b in range(bsize):
for n in range(nkern):
zz[b, n, ...].fill(0)
for im0 in range(stacklen):
for row in range(0, zz.shape[2], self.dx):
for col in range(0, zz.shape[3], self.dy):
zz[b, n, row, col] += (
img2d[
b, im0, row : row + kshp[0], col : col + kshp[1]
]
* filtersflipped[n, im0, ::-1, ::-1]
).sum()
# We copy it to remove the Stride mismatch warning from DEBUG_MODE.
# The copy make that we return an object with the same stride as the c version.
# The copy don't affect the performance during our experience as in that case we
# execute the c version which is much faster.
if self.dx > 1 or self.dy > 1:
zz = zz[:, :, 0 :: self.dx, 0 :: self.dy].copy()
z[0] = zz
def R_op(self, inputs, eval_points):
rval = None
if eval_points[0] is not None:
rval = self.make_node(eval_points[0], inputs[1]).outputs[0]
if eval_points[1] is not None:
if rval is None:
rval = self.make_node(inputs[0], eval_points[1]).outputs[0]
else:
rval += self.make_node(inputs[0], eval_points[1]).outputs[0]
return [rval]
def grad(self, inp, grads):
inputs, kerns = inp
(gz,) = grads
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
raise NotImplementedError("todo")
if self.out_mode == "valid" and (self.dx, self.dy) != (1, 1):
raise NotImplementedError(
"ERROR: ConvOp.grad is now disabled for 'valid' convolutions with"
" stride != (1, 1); call pytensor.tensor.nnet.conv2d() instead."
)
if self.dx not in (1, 2) or self.dy not in (1, 2):
raise NotImplementedError(
"ERROR: We disable ConvOp.grad now when output_mode is not"
" 'valid' and dx or dy are greater than 2, as there is a bug"
" in it. See `abstract_conv2d <>`_ for a version that support this."
)
all_shape = self.has_all_shape(self.imshp, self.kshp, self.nkern, self.bsize)
if not all_shape and (self.dx != 1 or self.dy != 1):
raise ValueError(
"ConvOp.grad when dx!=1 or dy!=1 we must have all "
"the optional shape information"
)
# Determine gradient on kernels ########
assert inputs.ndim == 4 and kerns.ndim == 4
newin = inputs.dimshuffle((1, 0, 2, 3))
newgz = gz.dimshuffle((1, 0, 2, 3))
if self.out_mode == "valid":
(img, filters) = (newin, newgz)
kshp_logical = self.fulloutshp
kshp_logical_top_aligned = False
imshp_logical = None
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = (self.bsize, self.imshp[1], self.imshp[2])
kshp = self.outshp
elif self.out_mode == "full":
(img, filters) = (newgz, newin)
kshp_logical = None
kshp_logical_top_aligned = True
imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = (self.bsize, self.outshp[0], self.outshp[1])
kshp = self.imshp[1:]
else:
raise NotImplementedError(
"Only [full,valid] modes are currently supported."
)
filters = filters[:, :, ::-1, ::-1] # flip them
dw = ConvOp(
imshp,
kshp,
nkern,
bsize,
1,
1,
output_mode="valid",
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
imshp_logical=imshp_logical,
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
verbose=self.verbose,
)
dw = dw(img, filters)
if all_shape:
assert all(o == k for o, k in zip(dw.owner.op.outshp, self.kshp))
if self.out_mode == "valid":
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw = dw.dimshuffle((1, 0, 2, 3))
dw = dw[:, :, ::-1, ::-1]
# Determine gradient on inputs ########
mode = "valid"
if self.out_mode != "full":
mode = "full"
filters = kerns.dimshuffle((1, 0, 2, 3))
filters = filters[:, :, ::-1, ::-1]
nkern = self.imshp[0]
imshp = (self.nkern, self.outshp[0], self.outshp[1])
imshp_logical = (self.nkern, self.fulloutshp[0], self.fulloutshp[1])
din = ConvOp(
imshp,
self.kshp,
nkern,
self.bsize,
1,
1,
output_mode=mode,
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
imshp_logical=imshp_logical,
kshp_logical=None,
verbose=self.verbose,
)
din = din(gz, filters)
assert all(
o is None or o == i for o, i in zip(din.owner.op.outshp, self.imshp[1:])
)
# din and dw should have the same broadcasting pattern as the
# parameters they are the gradient of (resp. inputs and kerns).
if din.type.broadcastable != inputs.type.broadcastable:
din = specify_broadcastable(
din, *(ax for (ax, b) in enumerate(inputs.type.broadcastable) if b)
)
if dw.type.broadcastable != kerns.type.broadcastable:
dw = specify_broadcastable(
dw, *(ax for (ax, b) in enumerate(kerns.type.broadcastable) if b)
)
return [din, dw]
def c_headers(self, **kwargs):
return ["<numpy/noprefix.h>", "<iostream>", "<sstream>"]
def c_code_cache_version(self):
return (15, self.openmp, blas.blas_header_version())
def c_support_code(self, **kwargs):
return (
"""
#define STRIDES(arr) (PyArray_STRIDES(arr))
#define FULL 2
#define SAME 1
#define VALID 0
#define MOD %
using namespace std;
"""
+ blas.blas_header_text()
)
def use_blas(self):
"""Return True if we will generate code that use gemm."""
# the gemm version only support that case
if self.out_mode == "valid" and self.dx == 0 and self.dy == 0:
# We use a faster version in those case.
if (
self.imshp != self.imshp_logical
or self.kshp != self.kshp_logical
or self.unroll_patch
or self.unroll_batch > 0
or self.unroll_kern > 0
):
return False
return True
return False
def c_libraries(self, **kwargs):
if self.use_blas():
return blas.ldflags()
return []
def c_no_compile_args(self, **kwargs):
# when the ksph==(1,1) gcc 4.3.0 segfault during the
# compilation with -O3. This don't happen at -O2
if pytensor.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
return ["-O3"]
else:
return []
def c_compile_args(self, **kwargs):
ret = []
if self.use_blas():
ret = blas.ldflags(libs=False, flags=True)
if pytensor.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
ret += ["-O2"]
# Add the -fopenmp flags
ret += super().c_compile_args(**kwargs)
return ret
def c_lib_dirs(self, **kwargs):
if self.use_blas():
return blas.ldflags(libs=False, libs_dir=True)
return []
def c_header_dirs(self, **kwargs):
if self.use_blas():
return blas.ldflags(libs=False, include_dir=True)
return []
def c_code(self, node, name, inp, out, sub):
img2d, filtersflipped = inp
(z,) = out
if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
raise NotImplementedError()
assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
d = locals()
d.update(sub)
all_shape = self.has_all_shape(
self.imshp, self.kshp, self.nkern, self.bsize
) and self.has_all_shape(self.imshp_logical, self.kshp_logical)
d["self_out_mode"] = self.out_mode
d["self_dx"] = self.dx
d["self_dy"] = self.dy
d["mode"] = self.out_mode.upper()
d["affectation"] = "="
# Default values, will be overridden if the shape info is provided
d["self_bsize"] = f"PyArray_DIMS({d['img2d']})[0]"
d["self_nkern"] = f"PyArray_DIMS({d['filtersflipped']})[0]"
d["self_outshp0"] = "-1"
d["self_outshp1"] = "-1"
d["self_imshp0"] = f"PyArray_DIMS({d['img2d']})[1]"
d["self_imshp1"] = f"PyArray_DIMS({d['img2d']})[2]"
d["self_imshp2"] = f"PyArray_DIMS({d['img2d']})[3]"
d["self_kshp0"] = f"PyArray_DIMS({d['filtersflipped']})[2]"
d["self_kshp1"] = f"PyArray_DIMS({d['filtersflipped']})[3]"
d["assert_size"] = ""
# Override the default value if we have it
if self.kshp[0] is not None:
expected = d["self_kshp0"]
value = self.kshp[0]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of rows in the filter "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_kshp0"] = self.kshp[0]
if self.kshp[1] is not None:
expected = d["self_kshp1"]
value = self.kshp[1]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of columns in the filter "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_kshp1"] = self.kshp[1]
if self.outshp[0] is not None:
expected = "dim_zz[0]"
value = self.outshp[0]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of rows in the output "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_outshp0"] = self.outshp[0]
if self.outshp[1] is not None:
expected = "dim_zz[1]"
value = self.outshp[1]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of columns in the output "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_outshp1"] = self.outshp[1]
if self.imshp[0] is not None:
expected = d["self_imshp0"]
value = self.imshp[0]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the image stack size (%%ld) "
"isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
expected = "kerns_dim[1]"
value = self.imshp[0]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the kernel stack size (%%ld) "
"isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_imshp0"] = self.imshp[0]
if self.imshp[1] is not None:
expected = d["self_imshp1"]
value = self.imshp[1]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of rows in the image "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_imshp1"] = self.imshp[1]
if self.imshp[2] is not None:
expected = d["self_imshp2"]
value = self.imshp[2]
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of columns in the image "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_imshp2"] = self.imshp[2]
if self.bsize is not None:
expected = d["self_bsize"]
value = self.bsize
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the batch size (%%ld) "
"isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_bsize"] = self.bsize
if self.nkern is not None:
expected = d["self_nkern"]
value = self.nkern
d[
"assert_size"
] += """
if(%(value)s != %(expected)s){
PyErr_Format(PyExc_ValueError,
"The hardcoded shape for the number of kernels in the filter "
"(%%ld) isn't the run time shape (%%ld).",
(long)%(value)s, (long)%(expected)s);
%(fail)s;
}
""" % dict(
expected=expected, value=value, **sub
)
d["self_nkern"] = self.nkern
# Other hard coded stuff only if we have all shapes
if all_shape:
d["self_kshp_logical_r"] = self.kshp_logical[0]
d["self_kshp_logical_c"] = self.kshp_logical[1]
d["self_kshp_logical_stride_r"] = int(
np.ceil(self.kshp_logical[0] / float(self.kshp[0]))
)
d["self_kshp_logical_stride_c"] = int(
np.ceil(self.kshp_logical[1] / float(self.kshp[1]))
)
d["self_imshp_logical_r"] = self.imshp_logical[1]
# numpy.B. 1 not 0
d["self_imshp_logical_c"] = self.imshp_logical[2]
# numpy.B. 2 not 1
d["self_imshp_logical_stride_r"] = int(
np.ceil(self.imshp_logical[1] / float(self.imshp[1]))
)
d["self_imshp_logical_stride_c"] = int(
np.ceil(self.imshp_logical[2] / float(self.imshp[2]))
)
if self.imshp[0] != 1:
d["affectation"] = "+="
d["all_shape"] = "1"
d["dim_zz_const"] = "const"
d["dim_zz_affect"] = ""
else:
d["affectation"] = "+="
d["all_shape"] = "0"
d["dim_zz_const"] = ""
d["dim_zz_affect"] = (
"""
if (mode == FULL) {
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(%(self_dy)s));
} else {
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s));
}
"""
% d
)
d["assert_size"] += (
"""
// Check the stack size of the filter and images are equals
if(kerns_dim[1] != img2d_dim[1]){
PyErr_Format(PyExc_ValueError,
"the filter stack size (%%ld) and image stack size (%%ld) differ",
(long)kerns_dim[1], (long)img2d_dim[1]);
%(fail)s;
}
"""
% sub
)
if self.kshp_logical_top_aligned:
d["self_kshp_logical_offset_r"] = 0
d["self_kshp_logical_offset_c"] = 0
elif all_shape:
rstride = d["self_kshp_logical_stride_r"]
cstride = d["self_kshp_logical_stride_c"]
d["self_kshp_logical_offset_r"] = (
self.kshp_logical[0] - (self.kshp[0] * rstride) - 1 + rstride
) % rstride
d["self_kshp_logical_offset_c"] = (
self.kshp_logical[1] - (self.kshp[1] * cstride) - 1 + cstride
) % cstride
del rstride, cstride
if node.inputs[0].type.dtype == "float32":
d["type"] = "float"
elif node.inputs[0].type.dtype == "float64":
d["type"] = "double"
else:
raise NotImplementedError(
f"Type {node.inputs[0].type.dtype} not implemented"
)
d["gemm"] = "dgemm_"
if d["type"] != "double":
d["gemm"] = "sgemm_"
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
if self.verbose:
_logger.debug(
"return imshp!=imshp_logical or"
" self.kshp != self.kshp_logical shape version"
)
return _conv_op_code_a % d
if self.unroll_patch:
if self.verbose:
_logger.debug("return unroll patch version. all_shape=%s", all_shape)
return _conv_op_code_unroll_patch % d
if (self.unroll_batch is not None and self.unroll_batch > 0) or (
self.unroll_kern is not None and self.unroll_kern > 0
):
assert self.unroll_batch > 0
assert self.unroll_kern > 0
if self.verbose:
_logger.debug(
"return unrolled batch (%s) and kern code (%s)",
str(self.unroll_batch),
str(self.unroll_kern),
)
return gen_conv_code_unroll_batch_kern(
d, self.unroll_batch, self.unroll_kern
)
# TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if self.out_mode == "valid" and self.dx == 0 and self.dy == 0:
if self.verbose:
_logger.debug("return gemm version")
return _conv_op_code_valid_gemm % d
else:
if self.verbose:
_logger.debug("return no gemm version")
return _conv_op_code_a % d
_conv_op_code_a = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im_phys[2]={%(self_imshp1)s,%(self_imshp2)s};
npy_intp dim_im_log[2]={%(self_imshp_logical_r)s,%(self_imshp_logical_c)s};
npy_intp dim_ker_phys[2]={%(self_kshp0)s,%(self_kshp1)s};
npy_intp dim_ker_log[2]={%(self_kshp_logical_r)s,%(self_kshp_logical_c)s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s;
}
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
std::stringstream temp;
temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
%(fail)s;
}
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
img2d_arr = (PyArrayObject*)img2d;
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError,
"invalid mode, only full and valid are supported");
%(fail)s;
}
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
if (typenum != typenum_f) {
PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)s;
}
if (!img2d)
{
PyErr_SetString(PyExc_AssertionError, "!img2d");
%(fail)s;
}
if (!filtersflipped)
{
PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
%(fail)s;
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
||(PyArray_DIMS(%(z)s)[3] != dim_zz[1])
||!PyArray_ISCONTIGUOUS(%(z)s)
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (!PyArray_ISCONTIGUOUS(%(z)s))
{
PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
%(fail)s;
}
for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size
//row position in logical output image
int pos_m = iter_m*%(self_dx)s;
//row anchor in logical input image (we will loop upward from here)
int new_m;
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker_log[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
// current col position in logical output image
int pos_n=iter_n*%(self_dy)s;
%(type)s sum=0;
// Sum over kernel, if index into image is out of bounds
// fill with the value
// loop over logical rows in kernel
for (int j_log=0; j_log < %(self_kshp_logical_r)s; j_log++) {
// ind0_log: row position in logical input image
int ind0_log = (new_m-j_log);
if ((j_log < %(self_kshp_logical_offset_r)s) ||
(j_log - %(self_kshp_logical_offset_r)s) MOD %(self_kshp_logical_stride_r)s)
continue;
if (ind0_log MOD %(self_imshp_logical_stride_r)s)
continue;
int j_phys = ((j_log- %(self_kshp_logical_offset_r)s) /
%(self_kshp_logical_stride_r)s);
int ind0_phys = (ind0_log / %(self_imshp_logical_stride_r)s);
//std::cerr <<"j_log" << j_log << " j_phys " << j_phys << " " << ind0_phys << "\\n";
if(mode==FULL){
//This is a pointer to the current row of the kernel
const %(type)s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
if(ind0_log < 0 || ind0_log >= dim_im_log[0]){
// the current row of the kernel is off the image
}else{
int k = max((int)(pos_n-dim_im_log[1])+1,0);
int max_k=min(pos_n+1,(int)dim_ker_log[1]);
const %(type)s * idx_in=&in[ind0_phys*dim_im_phys[1]];
for (int ind1_log=pos_n-k; k<max_k; k++,ind1_log--) {
if (1)
{
if ((k < %(self_kshp_logical_offset_c)s) ||
(k - %(self_kshp_logical_offset_c)s) MOD
%(self_kshp_logical_stride_c)s)
continue;
if (ind1_log MOD
%(self_imshp_logical_stride_c)s)
continue;
}
sum += idx_hvals[(k-%(self_kshp_logical_offset_c)s) /
%(self_kshp_logical_stride_c)s] *
idx_in[ind1_log / %(self_imshp_logical_stride_c)s];
}
}
}else{ // mode==VALID
//JB: should be dim_im[1] right? (was dim_im[0])
const %(type)s* idx_in=&in[ind0_phys*dim_im_phys[1]];
const %(type)s* idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
int new_n = (pos_n+dim_ker_log[1]-1);
if (%(self_imshp_logical_stride_c)s != 1) // a general loop
{
for (int k=0,last=new_n; k < dim_ker_log[1]; k++,last--) {
if ((k < %(self_kshp_logical_offset_c)s) ||
(k - %(self_kshp_logical_offset_c)s) MOD
%(self_kshp_logical_stride_c)s)
continue;
else if (last MOD %(self_imshp_logical_stride_c)s)
continue;
else
{
sum+=idx_hvals[(k-%(self_kshp_logical_offset_c)s) /
%(self_kshp_logical_stride_c)s] *
idx_in[last/%(self_imshp_logical_stride_c)s];
}
}
}
else // self_imshp_stride_c == 1
{
int offset = %(self_kshp_logical_offset_c)s;
int k_phys=0;
for (int k_log=offset,last=new_n-offset;
k_log < dim_ker_log[1]; ) {
sum += idx_hvals[k_phys]*idx_in[last];
++k_phys;
last -= %(self_kshp_logical_stride_c)s;
k_log += %(self_kshp_logical_stride_c)s;
}
}
}
}//for j_log
out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
}//for iter_n
}//for iter_m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "\\n";
}
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
#########
# ConvOp c_code for valid mode (uses gemm)
#########
_conv_op_code_valid_gemm = """
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL, *z_arr=NULL;
const int NKERN = %(self_nkern)s;
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
const npy_intp dim_ker0=%(self_kshp0)s;
const npy_intp dim_ker1=%(self_kshp1)s;
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig;
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s;
}
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
std::stringstream temp;
temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
%(fail)s;
}
if (NKERN != kerns_dim[0])
{
PyErr_SetString(PyExc_NotImplementedError, "nonsense nkern");
%(fail)s;
}
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
img2d_arr = (PyArrayObject*)img2d;
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
if (!img2d) {
PyErr_SetString(PyExc_ValueError, "Null argument img2d");
%(fail)s;
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
%(assert_size)s
int Os[2];
Os[0] = dim_im[0]-dim_ker0+1;
Os[1] = dim_im[1]-dim_ker1+1;
// allocate a temporary buffer for storing the inner product of each nth kernel row
// with each row of an image
{
%(type)s * kbuf = (%(type)s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)%(filtersflipped)s))* (npy_intp)sizeof(%(type)s));
int kbufstride = NKERN;
%(type)s * myfilters = kbuf + Os[0] * NKERN;
//copy out filtersflipped into filters un-flipped format
//std::cerr << "__filling myfilters__\\n";
for(int i=0;i < kerns_dim[0];++i){
for(int j=0;j < kerns_dim[1];++j){
for(int k=0;k < kerns_dim[2];++k){
for(int l=0;l < kerns_dim[3];++l){
%(type)s * ff = ((PyArray_NDIM(%(filtersflipped)s)) == 3)
? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
: (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
+ j * (kerns_dim[2]*kerns_dim[3])
+ k * (kerns_dim[3])
+ l] = ff[0];
//std::cerr << " " << ff[0];
}
//std::cerr << "\\n";
}
//std::cerr << "(end of stack/batch " <<j << "/" << i << " ) \\n";
}
}
//std::cerr << "-----new loop ----\\n";
for(int b=0;b< %(self_bsize)s;b++){
for (int img_col = 0; img_col < Os[1]; ++img_col){
for (int filter_row = 0; filter_row < kerns_dim[2]; ++filter_row){
for (int stackidx = 0; stackidx < %(self_imshp0)s; ++stackidx){
%(type)s * img_colview =
(%(type)s *)(PyArray_GETPTR4(img2d, b, stackidx, filter_row, img_col));
%(type)s * filter_rows = myfilters + stackidx * (kerns_dim[2]*kerns_dim[3]) +
filter_row * kerns_dim[3];
//std::cerr << "filterview offset: " << filter_rows - myfilters << "\\n";
char N = 'N'; char T = 'T';
int Nz0 = Os[0];
int Nz1 = NKERN;
int K = kerns_dim[3];
%(type)s alpha = 1.0;
%(type)s beta = stackidx ? 1.0 : 0.0;
int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation
assert(PyArray_STRIDES(img2d)[3] == (npy_intp)sizeof(%(type)s));
if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
std::cerr << "colview (physical layout) stride: " << imgview_stride << "\\n";
for (int ii = 0; ii < Nz0; ++ii){
for (int jj = 0; jj < K; ++jj){
std::cerr << " " << img_colview[ii * imgview_stride + jj];
}
std::cerr << "\\n";
}
std::cerr << "filterview ("<<filter_row<<"'th rows) stride: " << filter_rows_stride << "\\n";
for (int ii = 0; ii < Nz1; ++ii){
for (int jj = 0; jj < K; ++jj){
std::cerr << " " << filter_rows[ii * filter_rows_stride + jj];
}
std::cerr << "\\n";
}
std::cerr << Nz1 << " " << Nz0 << " " << K << "\\n" ;
}
%(gemm)s(&T, &N,
&Nz1, &Nz0, &K,
&alpha,
filter_rows, &filter_rows_stride,
img_colview, &imgview_stride,
&beta, kbuf, &kbufstride);
if (0){
std::cerr << "z (logical layout) beta" << beta << "\\n";
for (int ii = 0; ii < Nz0; ++ii){
for (int jj = 0; jj < Nz1; ++jj){
std::cerr << " " << kbuf[ii * kbufstride + jj];
}
std::cerr << "\\n";
}
}
}
// now kbuf the sum over the stack, put it into the outbuf
for (int img_row = 0; img_row < Os[0]; ++img_row) {
for (int kernel_idx = 0; kernel_idx < NKERN; ++kernel_idx) {
%(type)s * z_p = (%(type)s *)PyArray_GETPTR4(%(z)s, b, kernel_idx, img_row, img_col);
if (0)
{
if (b >= PyArray_DIMS(%(z)s)[0]) %(fail)s;
if (kernel_idx >= PyArray_DIMS(%(z)s)[1]) %(fail)s;
if (img_row >= PyArray_DIMS(%(z)s)[2]) %(fail)s;
if (img_col >= PyArray_DIMS(%(z)s)[3]) %(fail)s;
}
z_p[0] += kbuf[img_row * kbufstride + kernel_idx];
}
}
}
}
}
free(kbuf);
}
Py_XDECREF(img2d);
"""
def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
"""
c_code for ConvOp that unroll the batch size loop.
"""
assert unroll_bsize > 0 and unroll_ksize > 0
if (
"unroll_bsize" in d
or "unroll_ksize" in d
or "unroll_iter" in d
or "unroll_biter" in d
or "unroll_kiter" in d
):
raise ValueError(
"We can't use this dictionary as we will overwrite some of its content"
)
d = d.copy()
d["unroll_bsize"] = unroll_bsize
d["unroll_ksize"] = unroll_ksize
def my_dup(st, size):
s = ""
for i in range(size):
d["unroll_iter"] = i
s += st % d
return s + "\n"
def my_dup2(st):
s = ""
iter = 0
for i in range(unroll_bsize):
d["unroll_biter"] = i
for j in range(unroll_ksize):
d["unroll_kiter"] = j
d["unroll_iter"] = iter
iter += 1
s += st % d
return s + "\n"
ret = (
"""
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
const npy_intp dim_ker0=%(self_kshp0)s;
const npy_intp dim_ker1=%(self_kshp1)s;
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
std::stringstream temp;
temp << "nddim="<<PyArray_NDIM(%(img2d)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("img don't have a good shape. " + param).c_str());
%(fail)s;
}
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)s;
}
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
img2d_arr = (PyArrayObject*)img2d;
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
}
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
if (!img2d)
{
PyErr_SetString(PyExc_AssertionError, "!img2d");
%(fail)s;
}
if (!filtersflipped)
{
PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
%(fail)s;
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
||(PyArray_DIMS(%(z)s)[3] != dim_zz[1])
||!PyArray_ISCONTIGUOUS(%(z)s)
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (!PyArray_ISCONTIGUOUS(%(z)s))
{
PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
%(fail)s;
}
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
"""
% d
)
ret += my_dup2(
"%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(z_arr,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));"
)
ret += my_dup(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;",
unroll_bsize * unroll_ksize,
)
ret += (
"""
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
"""
% d
)
ret += my_dup(
"const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b+%(unroll_iter)s,stack_size));",
unroll_bsize,
)
ret += my_dup(
"const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern+%(unroll_iter)s,stack_size));",
unroll_ksize,
)
ret += (
"""
int new_m;
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size
int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker0-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
"""
% d
)
ret += my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize * unroll_ksize)
ret += (
"""
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker0; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
"""
% d
)
ret += my_dup(
"const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",
unroll_ksize,
)
ret += (
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker1; k++) {
"""
% d
)
ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret += (
"""
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""
% d
)
ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret += (
"""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker1);
"""
% d
)
ret += my_dup(
"const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];",
unroll_bsize,
)
ret += (
"""
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
"""
% d
)
ret += my_dup2(
"sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];"
)
ret += (
"""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker1;k++){
"""
% d
)
ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret += (
"""
}
}
}else{//valid mode
"""
% d
)
ret += my_dup(
"const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];",
unroll_bsize,
)
ret += my_dup(
"const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",
unroll_ksize,
)
ret += (
"""
int new_n = (pos_n+dim_ker1-1);
for (int k=0,last=new_n; k < dim_ker1; k++,last--) {
"""
% d
)
ret += my_dup2(
"sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];"
)
ret += (
"""
}
}
}//for j
"""
% d
)
ret += my_dup(
"out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;",
unroll_bsize * unroll_ksize,
)
ret += """
}//for n
}//for m
}//for stack_size
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
return ret
_conv_op_code_unroll_patch = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
const npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
//The following line caused gcc 4.3.0 20080428 (Red Hat 4.3.0-8) to crash
//const npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
// The next line had gcc don't crash.
const npy_intp dim_ker0=%(self_kshp0)s;
const npy_intp dim_ker1=%(self_kshp1)s;
%(dim_zz_const)s npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
%(dim_zz_affect)s
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_Format(PyExc_ValueError,
"image don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s;
}
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
PyErr_Format(PyExc_ValueError,
"kernel don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s;
}
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((PyArray_STRIDES(img2d_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
img2d_arr = (PyArrayObject*)img2d;
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((PyArray_STRIDES(filtersflipped_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
}
if(dim_zz[0]<=0 || dim_zz[1]<=0){
PyErr_Format(PyExc_ValueError,
"Output dimensions are not valid %%ldx%%ld",(long int)dim_zz[0],(long int)dim_zz[1]);
%(fail)s;
}
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s;
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0};
if(!dims) %(fail)s;
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
// assert the output is C-contiguous
if (!PyArray_ISCONTIGUOUS(%(z)s))
{
PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
%(fail)s;
}
//The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself!
#pragma omp parallel for schedule(static) if(%(self_bsize)s * %(self_nkern)s > 1)
// We merge the 2 loop into one to make it easier to parallelize on both
// This is the equivalent of those 2 lines.
//for(int b=0;b< %(self_bsize)s;b++){
// for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
for(int batch_kern_idx=0;
batch_kern_idx < %(self_bsize)s * %(self_nkern)s;
batch_kern_idx++){
int b = batch_kern_idx / %(self_nkern)s;
int n_kern = batch_kern_idx %% %(self_nkern)s;
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
int new_m;
for (int iter_m=0; iter_m < dim_zz[0]; iter_m++) {
// Reposition index into input image based on requested output size
int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker0-1);
for (int iter_n=0; iter_n < dim_zz[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
%(type)s sum=0;
%(type)s sum2=0;
%(type)s sum3=0;
%(type)s sum4=0;
int nb_sum=0;
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker0; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
const %(type)s * idx_hvals=&hvals[j*dim_ker1];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker1; k++) {
sum+= idx_hvals[k] * fill_value;
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx_hvals[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker1);
const %(type)s * idx_in=&in[ind0*dim_im[1]];
if(iter_n + 4*%(self_dy)s < dim_zz[1]
&& iter_n>dim_ker1-1
&& iter_n<dim_im[1]-dim_ker1+1-3){
nb_sum=4;
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
}
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]
&& iter_n>dim_ker1-1
&& iter_n<dim_im[1]-dim_ker1+1){
nb_sum=2;
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
}
}else{
nb_sum=1;
/*
%(type)s sum_=0;
if((k-max_k) & 0x1 != 0){
sum+= idx_hvals[k] * idx_in[pos_n-k];
}
for (int ind1=pos_n-k; k<max_k; k+=2,ind1-=2) {
sum+= idx_hvals[k] * idx_in[ind1];
sum_+= idx_hvals[k+1] * idx_in[ind1-1];
}
sum+=sum_;
*/
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
}
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker1;k++) sum+= idx_hvals[k]*fill_value;
}
}else{//valid mode
const %(type)s* idx_in=&in[ind0*dim_im[1]];
const %(type)s* idx_hvals=&hvals[j*dim_ker1];
if(iter_n + 4*%(self_dy)s < dim_zz[1]){
nb_sum=4;
for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
sum3+=idx_hvals[k]*idx_in[im_idx+2*%(self_dy)s];
sum4+=idx_hvals[k]*idx_in[im_idx+3*%(self_dy)s];
}
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]){
nb_sum=2;
for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
}
}else{
nb_sum=1;
for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
}
}
}//else valid mode
}//for j
switch(nb_sum){
case 4: out[iter_m*dim_zz[1]+iter_n+3] %(affectation)s sum4;
case 3: out[iter_m*dim_zz[1]+iter_n+2] %(affectation)s sum3;
case 2: out[iter_m*dim_zz[1]+iter_n+1] %(affectation)s sum2;
case 1: out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
}
iter_n+=nb_sum-1;
}//for iter_n
}//for iter_m
}//for stack_size
}//for b and n_kern
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
import pytensor
from pytensor import tensor as at
from pytensor.gradient import DisconnectedType
from pytensor.graph.basic import Apply
from pytensor.graph.op import Op
from pytensor.graph.rewriting.basic import (
WalkingGraphRewriter,
copy_stack_trace,
node_rewriter,
)
def get_diagonal_subtensor_view(x, i0, i1):
"""
Helper function for DiagonalSubtensor and IncDiagonalSubtensor.
Notes
-----
It returns a partial view of x, not a partial copy.
"""
# We have to cast i0 and i0 to int because python
# do not support indexing with 0-dim, 'int*' ndarrays.
i0 = int(i0)
i1 = int(i1)
if x.shape[i0] < x.shape[i1]:
raise NotImplementedError("is this allowed?")
idx = [slice(None)] * x.ndim
idx[i0] = slice(x.shape[i1] - 1, None, None)
xview = x.__getitem__(tuple(idx))
strides = list(xview.strides)
if x.shape[i1] != 1:
strides[i1] -= strides[i0]
xview.strides = strides
return xview
class DiagonalSubtensor(Op):
"""
Return a form a nd diagonal subtensor.
Parameters
----------
x
n-d tensor
i0
Axis index in x
i1
Axis index in x
Extended summary
----------------
``x`` is some n-dimensional tensor, but this Op only deals with a
matrix-shaped slice, using axes i0 and i1. Without loss of
generality, suppose that ``i0`` picks out our ``row`` dimension,
and i1 the ``column`` dimension.
So the relevant part of ``x`` is some matrix ``u``. Suppose it has 7 rows
and 4 columns::
[ 0 0 0 0 ]
[ 0 0 0 0 ]
[ 0 0 0 0 ]
[ 0 0 0 0 ]
[ 0 0 0 0 ]
[ 0 0 0 0 ]
The view returned by this function is also a matrix. It's a thick,
diagonal ``stripe`` across u that discards the lower left triangle
and the upper right triangle:
[ x 0 0 0 ]
[ x x 0 0 ]
[ x x x 0 ]
[ 0 x x x ]
[ 0 0 x x ]
[ 0 0 0 x ]
In this case the return value would be this view of shape 3x4. The
returned view has the same number of dimensions as the input
``x``, and the only difference is that the shape along dimension
``i0`` has been reduced by ``shape[i1] - 1`` because of the
triangles that got chopped out.
The NotImplementedError is meant to catch the case where shape[i0]
is too small for the stripe to reach across the matrix, in which
case it's not clear what this function should do. Maybe always
raise an error. I'd look back to the call site in the Conv3D to
see what's necessary at that point.
"""
__props__ = ("inplace",)
def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return f"{self.__class__.__name__}"
def __init__(self, inplace=False):
self.inplace = inplace
if inplace:
self.view_map = {0: [0]}
def make_node(self, x, i0, i1):
_i0 = at.as_tensor_variable(i0)
_i1 = at.as_tensor_variable(i1)
# TODO: We could produce a more precise static shape output type
type_shape = (1 if shape == 1 else None for shape in x.type.shape)
out_type = at.TensorType(x.type.dtype, shape=type_shape)
return Apply(self, [x, _i0, _i1], [out_type()])
def perform(self, node, inputs, output_storage):
xview = get_diagonal_subtensor_view(*inputs)
if self.inplace:
output_storage[0][0] = xview
else:
output_storage[0][0] = xview.copy()
def grad(self, inputs, g_outputs):
z = at.zeros_like(inputs[0])
gx = inc_diagonal_subtensor(z, inputs[1], inputs[2], g_outputs[0])
return [gx, DisconnectedType()(), DisconnectedType()()]
def connection_pattern(self, node):
rval = [[True], [False], [False]]
return rval
diagonal_subtensor = DiagonalSubtensor(False)
class IncDiagonalSubtensor(Op):
"""
The gradient of DiagonalSubtensor.
"""
__props__ = ("inplace",)
def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return f"{self.__class__.__name__}"
def __init__(self, inplace=False):
self.inplace = inplace
if inplace:
self.destroy_map = {0: [0]}
def make_node(self, x, i0, i1, amt):
_i0 = at.as_tensor_variable(i0)
_i1 = at.as_tensor_variable(i1)
return Apply(self, [x, _i0, _i1, amt], [x.type()])
def perform(self, node, inputs, output_storage):
x, i0, i1, amt = inputs
if not self.inplace:
x = x.copy()
xview = get_diagonal_subtensor_view(x, i0, i1)
xview += amt
output_storage[0][0] = x
def grad(self, inputs, g_outputs):
x, i0, i1, amt = inputs
gy = g_outputs[0]
return [
gy,
DisconnectedType()(),
DisconnectedType()(),
diagonal_subtensor(gy, i0, i1),
]
def connection_pattern(self, node):
rval = [[True], [False], [False], [True]]
return rval
inc_diagonal_subtensor = IncDiagonalSubtensor(False)
def conv3d(
signals, filters, signals_shape=None, filters_shape=None, border_mode="valid"
):
"""
Convolve spatio-temporal filters with a movie.
It flips the filters.
Parameters
----------
signals
Timeseries of images whose pixels have color channels.
Shape: [Ns, Ts, C, Hs, Ws].
filters
Spatio-temporal filters.
Shape: [Nf, Tf, C, Hf, Wf].
signals_shape
None or a tuple/list with the shape of signals.
filters_shape
None or a tuple/list with the shape of filters.
border_mode
One of 'valid', 'full' or 'half'.
Notes
-----
Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column)
See Also
--------
Someone made a script that shows how to swap the axes between
both 3d convolution implementations in PyTensor. See the last
`attachment <https://groups.google.com/d/msg/pytensor-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
"""
if isinstance(border_mode, str):
border_mode = (border_mode, border_mode, border_mode)
if signals_shape is None:
_signals_shape_5d = signals.shape
else:
_signals_shape_5d = signals_shape
if filters_shape is None:
_filters_shape_5d = filters.shape
else:
_filters_shape_5d = filters_shape
Ns, Ts, C, Hs, Ws = _signals_shape_5d
Nf, Tf, C, Hf, Wf = _filters_shape_5d
_signals_shape_4d = (Ns * Ts, C, Hs, Ws)
_filters_shape_4d = (Nf * Tf, C, Hf, Wf)
if border_mode[1] != border_mode[2]:
raise NotImplementedError("height and width bordermodes must match")
conv2d_signal_shape = _signals_shape_4d
conv2d_filter_shape = _filters_shape_4d
if signals_shape is None:
conv2d_signal_shape = None
if filters_shape is None:
conv2d_filter_shape = None
out_4d = pytensor.tensor.nnet.conv2d(
signals.reshape(_signals_shape_4d),
filters.reshape(_filters_shape_4d),
input_shape=conv2d_signal_shape,
filter_shape=conv2d_filter_shape,
border_mode=border_mode[1],
) # ignoring border_mode[2]
# compute the intended output size
if border_mode[1] == "valid":
Hout = Hs - Hf + 1
Wout = Ws - Wf + 1
elif border_mode[1] == "full":
Hout = Hs + Hf - 1
Wout = Ws + Wf - 1
elif border_mode[1] == "half":
Hout = Hs - (Hf % 2) + 1
Wout = Ws - (Wf % 2) + 1
elif border_mode[1] == "same":
raise NotImplementedError()
else:
raise ValueError("invalid border mode", border_mode[1])
# reshape the temporary output to restore its original size
out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout))
# now sum out along the Tf to get the output
# but we have to sum on a diagonal through the Tf and Ts submatrix.
if Tf == 1:
# for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout))
else:
# for some types of convolution, pad out_tmp with zeros
if border_mode[0] == "valid":
Tpad = 0
elif border_mode[0] == "full":
Tpad = Tf - 1
elif border_mode[0] == "half":
Tpad = Tf // 2
elif border_mode[0] == "same":
raise NotImplementedError()
else:
raise ValueError("invalid border mode", border_mode[0])
if Tpad == 0:
out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
else:
# pad out_tmp with zeros before summing over the diagonal
out_tmp_padded = at.zeros(
dtype=out_tmp.dtype, shape=(Ns, Ts + 2 * Tpad, Nf, Tf, Hout, Wout)
)
out_tmp_padded = pytensor.tensor.subtensor.set_subtensor(
out_tmp_padded[:, Tpad : (Ts + Tpad), :, :, :, :], out_tmp
)
out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)
return out_5d
@node_rewriter([DiagonalSubtensor, IncDiagonalSubtensor])
def local_inplace_DiagonalSubtensor(fgraph, node):
"""Also work for IncDiagonalSubtensor."""
if (
isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor))
and not node.op.inplace
):
new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs)
copy_stack_trace(node.outputs[0], new_node)
return [new_node]
return False
pytensor.compile.optdb.register(
"local_inplace_DiagonalSubtensor",
WalkingGraphRewriter(
local_inplace_DiagonalSubtensor,
failure_callback=WalkingGraphRewriter.warn_inplace,
),
"fast_run",
"inplace",
position=60,
)
import os
import sys
import pytensor.tensor as at
from pytensor.configdefaults import config
from pytensor.gradient import grad_undefined
from pytensor.graph.basic import Apply
from pytensor.graph.rewriting.basic import node_rewriter
from pytensor.link.c.cmodule import GCC_compiler
from pytensor.link.c.op import ExternalCOp, OpenMPOp
from pytensor.tensor.blas import batched_dot
from pytensor.tensor.extra_ops import cpu_contiguous
from pytensor.tensor.rewriting.basic import register_canonicalize
from pytensor.tensor.type import ftensor3, fvector
def _ctc_find_lib():
"""
Find the directory that contains libwarpctc.so
"""
if config.ctc__root != "":
for lib_dir in ("build", "lib", "lib64"):
lib_path = os.path.join(config.ctc__root, lib_dir)
if os.path.isdir(lib_path) and os.path.exists(lib_path):
lib_found = os.path.exists(os.path.join(lib_path, "libwarpctc.so"))
if lib_found:
return lib_path
return None
def _ctc_check_compile(ctc_lib_path):
preamble = """
#include <string.h>
#include "ctc.h"
"""
body = """
ctcOptions options;
memset(&options, 0, sizeof(ctcOptions));
options.loc = CTC_CPU;
options.num_threads = 1;
"""
params = [f"-I{os.path.dirname(__file__)}"]
if ctc_lib_path is not None:
params.extend([f"-I{os.path.join(config.ctc__root, 'include')}"])
params.extend([f"-L{ctc_lib_path}"])
params.extend(["-l", "warpctc"])
compiler_res = GCC_compiler.try_flags(
params, preamble=preamble, body=body, try_run=False, output=True
)
avail, out, err = (
compiler_res if isinstance(compiler_res, tuple) else (compiler_res, None, None)
)
if not avail:
return (
False,
("cannot compile with warp-ctc. " "We got this error:\n" + str(err)),
)
return True, None
def ctc_present():
if ctc_present.avail is not None:
return ctc_present.avail
ctc_lib_path = _ctc_find_lib()
ctc_present.path = ctc_lib_path
ctc_present.avail, ctc_present.msg = _ctc_check_compile(ctc_present.path)
return ctc_present.avail
ctc_present.avail = None
ctc_present.msg = None
ctc_present.path = None
def ctc_available():
if os.name == "nt":
ctc_available.msg = ("Windows platforms are currently not supported ",)
"by underlying CTC library (warp-ctc)."
return False
elif not ctc_present():
ctc_available.msg = ctc_present.msg
return False
ctc_available.path = ctc_present.path
return True
ctc_available.msg = None
ctc_available.path = None
class ConnectionistTemporalClassification(ExternalCOp, OpenMPOp):
"""
CTC loss function wrapper.
Notes
-----
Using the wrapper requires that Baidu's warp-ctc library is installed.
If the warp-ctc library is not on your compiler's default library path,
you must set the configuration variable ``config.ctc__root`` appropriately.
Parameters
----------
compute_grad
If set to True, enables the computation of gradients of the CTC loss function.
"""
__props__ = ("compute_grad",)
_cop_num_inputs = 3
_cop_num_outputs = 2
func_file = os.path.join("c_code", "ctc_wrapper.c")
func_name = "APPLY_SPECIFIC(ctc_cost_cpu)"
def __init__(self, compute_grad=True, openmp=None):
if not ctc_available():
raise RuntimeError(
"Baidu CTC is not available and "
"ConnectionistTemporalClassification Op "
"can not be constructed."
)
super().__init__(self.func_file, self.func_name)
OpenMPOp.__init__(self, openmp=openmp)
self.compute_grad = compute_grad
# Return only the cost. Gradient will be returned by grad()
self.default_output = 0
def c_lib_dirs(self, **kwargs):
lib_dirs = []
if ctc_available.path is not None:
lib_dirs += [ctc_available.path]
return lib_dirs
def c_compile_args(self, **kwargs):
if ctc_available.path is not None:
if sys.platform != "darwin" and " " in ctc_available.path:
return ['-Wl,-rpath,"' + ctc_available.path + '"']
else:
return ["-Wl,-rpath," + ctc_available.path]
return []
def c_libraries(self, **kwargs):
return ["warpctc"]
def c_header_dirs(self, **kwargs):
header_dirs = []
if config.ctc__root != "":
# We assume here that the header is available at the include directory
# of the CTC root directory.
header_dirs += [os.path.join(config.ctc__root, "include")]
return header_dirs
def c_headers(self, **kwargs):
return ["ctc.h"] + super().c_headers(**kwargs)
def make_node(self, activations, labels, input_lengths):
t_activations = at.as_tensor_variable(activations)
# Ensure activations array is C-contiguous
t_activations = cpu_contiguous(t_activations)
t_labels = at.as_tensor_variable(labels)
t_input_lengths = at.as_tensor_variable(input_lengths)
if t_activations.type.dtype != "float32":
raise TypeError("activations must use the float32 type!")
if t_activations.ndim != 3:
raise ValueError("activations must have 3 dimensions.")
if t_labels.type.dtype != "int32":
raise TypeError("labels must use the int32 type!")
if t_labels.ndim != 2:
raise ValueError("labels must have 2 dimensions.")
if t_input_lengths.type.dtype != "int32":
raise TypeError("input_lengths must use the int32 type!")
if t_input_lengths.ndim != 1:
raise ValueError("input_lengths must have 1 dimension.")
costs = fvector(name="ctc_cost")
outputs = [costs]
if self.compute_grad:
gradients = ftensor3(name="ctc_grad")
outputs += [gradients]
return Apply(
self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs
)
def L_op(self, inputs, outputs, output_grads):
assert self.compute_grad and len(outputs) == 2
gradients = outputs[1]
assert gradients is not None
grad_op = output_grads[0]
total_grad = batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
1, 0, 2
)
return [
total_grad,
grad_undefined(self, 1, inputs[1]),
grad_undefined(self, 2, inputs[2]),
]
def ctc(activations, labels, input_lengths):
"""
Compute CTC loss function.
Notes
-----
Using the loss function requires that the Baidu's warp-ctc library be installed.
If the warp-ctc library is not on the compiler's default library path, the
configuration variable ``config.ctc__root`` must be properly set.
Parameters
----------
activations
Three-dimensional tensor, which has a shape of (t, m, p), where
t is the time index, m is the minibatch index, and p is the index
over the probabilities of each symbol in the alphabet. The memory
layout is assumed to be in C-order, which consists in the slowest
to the fastest changing dimension, from left to right. In this case,
p is the fastest changing dimension.
labels
A 2-D tensor of all the labels for the minibatch. In each row, there
is a sequence of target labels. Negative values are assumed to be padding,
and thus are ignored. Blank symbol is assumed to have index 0 in the
alphabet.
input_lengths
A 1-D tensor with the number of time steps for each sequence in
the minibatch.
Returns
-------
1-D array
Cost of each example in the minibatch.
"""
return ConnectionistTemporalClassification()(activations, labels, input_lengths)
# Disable gradient computation if not needed
@register_canonicalize("fast_compile")
@node_rewriter([ConnectionistTemporalClassification])
def local_ctc_no_grad(fgraph, node):
if isinstance(node.op, ConnectionistTemporalClassification):
if len(node.outputs) > 1:
if len(fgraph.clients[node.outputs[1]]) == 0: # gradient is not used
return [
ConnectionistTemporalClassification(compute_grad=False)(
*node.inputs
),
None,
]
return False
"""
TODO: implement Images2Neibs.infer_shape() methods
"""
import numpy as np
import pytensor
from pytensor.gradient import grad_not_implemented, grad_undefined
from pytensor.graph.basic import Apply
from pytensor.link.c.op import COp
from pytensor.link.c.type import EnumList
from pytensor.tensor.basic import arange, as_tensor_variable, concatenate, stack, zeros
from pytensor.tensor.math import ceil_intdiv
from pytensor.tensor.subtensor import inc_subtensor, set_subtensor
from pytensor.tensor.type import matrix
class Images2Neibs(COp):
"""
Reshapes the input as a 2D tensor where each row is an pooling
example.
Parameters
----------
mode : {'valid', 'ignore_borders', 'wrap_centered'}
- 'valid' :
Requires an input that is a multiple of the pooling factor
(in each direction).
- 'half' :
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0]//2, neib_shape[1]//2)
- 'full' :
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0] - 1, neib_shape[1] - 1)
- 'ignore_borders' :
Same as valid, but will ignore the borders if the shape(s)
of the input is not a multiple of the pooling factor(s).
- 'wrap_centered' :
?? TODO comment
"""
__props__ = ("mode",)
BORDER_MODE = EnumList(
("MODE_VALID", "valid"),
("MODE_HALF", "half"),
("MODE_FULL", "full"),
("MODE_WRAP_CENTERED", "wrap_centered"),
("MODE_IGNORE_BORDERS", "ignore_borders"),
)
params_type = BORDER_MODE
def get_params(self, node):
return self.mode
def __init__(self, mode="valid"):
implemented_modes = self.BORDER_MODE.get_aliases()
if mode not in implemented_modes:
raise NotImplementedError(
f"Only modes {', '.join(implemented_modes)} have been implemented for {type(self).__name__}"
)
self.mode = mode
def __str__(self):
return self.__class__.__name__ + "{%s}" % self.mode
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "mode"):
self.mode = "valid"
def make_node(self, ten4, neib_shape, neib_step=None):
"""
Parameters
----------
ten4 : a list of lists of images
ten4 is of shape (list 1 dim, list 2 dim, row, col).
neib_shape
(r,c) where r is the height of the neighborhood in rows and c is
the width of the neighborhood in columns.
neib_step
(dr,dc) where dr is the number of rows to skip between patch and dc
is the number of columns. When None, this is the same as neib_shape
(patch are disjoint).
Returns
-------
matrix
A 2D matrix, written using the following pattern::
idx = 0
for i in range(list 1 dim)
for j in range(list 2 dim)
for k in <image column coordinates>
for l in <image row coordinates>
output[idx,:]
= flattened version of ten4[i,j,l:l+r,k:k+c]
idx += 1
.. note:: The op isn't necessarily implemented internally with these
for loops, they're just the easiest way to describe the output
pattern.
"""
ten4 = as_tensor_variable(ten4)
neib_shape = as_tensor_variable(neib_shape)
if neib_step is None:
neib_step = neib_shape
else:
neib_step = as_tensor_variable(neib_step)
assert ten4.ndim == 4
assert neib_shape.ndim == 1
assert neib_step.ndim == 1
return Apply(
self, [ten4, neib_shape, neib_step], [matrix(dtype=ten4.type.dtype)]
)
def grad(self, inp, grads):
x, neib_shape, neib_step = inp
(gz,) = grads
if self.mode in ("valid", "ignore_borders"):
if (
neib_shape is neib_step
or neib_shape == neib_step
or
# PyTensor Constant == do not compare the data
# the equals function do that.
(hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))
):
return [
neibs2images(gz, neib_shape, x.shape, mode=self.mode),
grad_undefined(self, 1, neib_shape),
grad_undefined(self, 2, neib_step),
]
if self.mode in ["valid"]:
# Iterate over neighborhood positions, summing contributions.
def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
"""
Helper function that adds gradient contribution from a single
neighborhood position i,j.
pidx = Index of position within neighborhood.
pgz = Gradient of shape (batch_size*num_channels*neibs)
prior_result = Shape (batch_size, num_channnels, rows, cols)
neib_shape = Number of rows, cols in a neighborhood.
neib_step = Step sizes from image2neibs.
"""
nrows, ncols = neib_shape
rstep, cstep = neib_step
batch_size, num_channels, rows, cols = prior_result.shape
i = pidx // ncols
j = pidx - (i * ncols)
# This position does not touch some img pixels in valid mode.
result_indices = prior_result[
:,
:,
i : (rows - nrows + i + 1) : rstep,
j : (cols - ncols + j + 1) : cstep,
]
newshape = (
(batch_size, num_channels)
+ ((rows - nrows) // rstep + 1,)
+ ((cols - ncols) // cstep + 1,)
)
return inc_subtensor(result_indices, pgz.reshape(newshape))
indices = arange(neib_shape[0] * neib_shape[1])
pgzs = gz.dimshuffle((1, 0))
result, _ = pytensor.scan(
fn=pos2map,
sequences=[indices, pgzs],
outputs_info=zeros(x.shape),
non_sequences=[neib_shape, neib_step],
)
grad_input = result[-1]
return [
grad_input,
grad_undefined(self, 1, neib_shape),
grad_undefined(self, 2, neib_step),
]
return [
grad_not_implemented(self, 0, x),
grad_undefined(self, 1, neib_shape),
grad_undefined(self, 2, neib_step),
]
def c_code_cache_version(self):
return (10,)
def perform(self, node, inp, out_, params):
ten4, neib_shape, neib_step = inp
(z,) = out_
# XXX: GpuImages2Neibs should not run this perform in DebugMode
if not isinstance(self, Images2Neibs):
raise pytensor.graph.utils.MethodNotDefined()
def CEIL_INTDIV(a, b):
if a % b:
return (a // b) + 1
else:
return a // b
grid_c = -1 # number of patch in height
grid_d = -1 # number of patch in width
assert ten4.ndim == 4
assert neib_shape.ndim == 1
assert neib_shape.shape[0] == 2
assert neib_step.ndim == 1
assert neib_step.shape[0] == 2
c, d = neib_shape
step_x, step_y = neib_step
mode = self.mode
if step_x <= 0 or step_y <= 0:
raise ValueError(
"neib_step wrong step ; values <= 0. Got " + str(neib_step)
)
if c <= 0 or d <= 0:
raise ValueError("neib_shape values <=0. Got " + str(neib_shape))
if mode == "wrap_centered":
if (c % 2 != 1) or (d % 2 != 1):
raise TypeError(
"Images2Neibs: in mode wrap_centered need patch with odd shapes"
)
if (ten4.shape[2] < c) or (ten4.shape[3] < d):
raise TypeError(
"Images2Neibs: in wrap_centered mode, don't support"
" image shapes smaller then the patch shapes:"
f" neib_shape=({int(c)},{int(d)}), ten4[2:]=[{int(ten4.shape[2])},{int(ten4.shape[3])}]"
)
grid_c = CEIL_INTDIV(ten4.shape[2], step_x)
grid_d = CEIL_INTDIV(ten4.shape[3], step_y)
elif mode == "valid":
if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0):
raise TypeError(
f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
)
if (ten4.shape[3] < d) or (((ten4.shape[3] - d) % step_y) != 0):
raise TypeError(
f"neib_shape[1]={int(d)}, neib_step[1]={int(step_y)} and"
f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
)
# number of patch in height
grid_c = 1 + ((ten4.shape[2] - c) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - d) // step_y)
elif mode == "ignore_borders":
# number of patch in height
grid_c = 1 + ((ten4.shape[2] - c) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - d) // step_y)
elif mode == "half":
# This is equivalent to 'valid' with padding (c // 2, d // 2) on both sides
# Thus the expanded image will have size (h + 2 * (c // 2), w + 2 * (d // 2))
# Plugging these in the equation for 'valid' we get
# h + 2 * (c // 2) - c = h - (c % 2)
# w + 2 * (d // 2) - c = w - (d % 2)
if (ten4.shape[2] < c) or (((ten4.shape[2] - (c % 2)) % step_x) != 0):
raise TypeError(
f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
)
if (ten4.shape[3] < d) or (((ten4.shape[3] - (d % 2)) % step_y) != 0):
raise TypeError(
f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
)
# number of patch in height
grid_c = 1 + ((ten4.shape[2] - (c % 2)) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - (d % 2)) // step_y)
elif mode == "full":
# This is equivalent to 'valid' with padding (c - 1, d - 1) on both sides
# Thus the expanded image will have size (h + 2 * (c - 1), w + 2 * (d - 1))
# Plugging these in the equation for 'valid' we get
# h + 2 * (c - 1) - c = h + c - 2
# w + 2 * (d - 1) - c = w + d - 2
if (ten4.shape[2] < c) or (((ten4.shape[2] + c - 2) % step_x) != 0):
raise TypeError(
f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
)
if (ten4.shape[3] < d) or (((ten4.shape[3] + d - 2) % step_y) != 0):
raise TypeError(
f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
)
# number of patch in height
grid_c = 1 + ((ten4.shape[2] + c - 2) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] + d - 2) // step_y)
else:
raise TypeError(f"Images2Neibs: unknown mode '{mode}'")
z_dim0 = grid_c * grid_d * ten4.shape[1] * ten4.shape[0]
z_dim1 = c * d
z[0] = np.empty((z_dim0, z_dim1), dtype=node.outputs[0].dtype)
nb_batch = ten4.shape[0]
nb_stack = ten4.shape[1]
height = ten4.shape[2]
width = ten4.shape[3]
wrap_centered_half_idx_shift_x = c // 2
wrap_centered_half_idx_shift_y = d // 2
for n in range(nb_batch):
for s in range(nb_stack):
# loop over the number of patch in height
for a in range(grid_c):
# loop over the number of patch in width
for b in range(grid_d):
z_row = b + grid_d * (a + grid_c * (s + nb_stack * n))
for i in range(c):
ten4_2 = i + a * step_x
if mode == "wrap_centered":
ten4_2 -= wrap_centered_half_idx_shift_x
if ten4_2 < 0:
ten4_2 += height
elif ten4_2 >= height:
ten4_2 -= height
elif mode == "half":
ten4_2 -= wrap_centered_half_idx_shift_x
elif mode == "full":
ten4_2 -= c - 1
if ten4_2 < 0 or ten4_2 >= height:
z[0][z_row, d * i : d * i + d] = 0
else:
for j in range(d):
ten4_3 = j + b * step_y
if mode == "wrap_centered":
ten4_3 -= wrap_centered_half_idx_shift_y
if ten4_3 < 0:
ten4_3 += width
elif ten4_3 >= width:
ten4_3 -= width
elif mode == "half":
ten4_3 -= wrap_centered_half_idx_shift_y
elif mode == "full":
ten4_3 -= d - 1
z_col = j + d * i
if ten4_3 < 0 or ten4_3 >= width:
z[0][z_row, z_col] = 0
else:
z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3]
def infer_shape(self, fgraph, node, input_shape):
in_shape = input_shape[0]
c, d = node.inputs[1]
step_x, step_y = node.inputs[2]
if self.mode == "wrap_centered":
grid_c = ceil_intdiv(in_shape[2], step_x)
grid_d = ceil_intdiv(in_shape[3], step_y)
elif self.mode == "valid":
grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y)
elif self.mode == "ignore_borders":
grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y)
elif self.mode == "half":
grid_c = 1 + ((in_shape[2] - (c % 2)) // step_x)
grid_d = 1 + ((in_shape[3] - (d % 2)) // step_y)
elif self.mode == "full":
grid_c = 1 + ((in_shape[2] + c - 2) // step_x)
grid_d = 1 + ((in_shape[3] + d - 2) // step_y)
else:
raise TypeError(f"Images2Neibs: unknown mode '{self.mode}'")
z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0]
z_dim1 = c * d
return [(z_dim0, z_dim1)]
def c_code(self, node, name, inp, out, sub):
return """
#ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
#endif
int grid_c = -1; //number of patch in height
int grid_d = -1; //number of patch in width
{
if (PyArray_NDIM(%(ten4)s) != 4)
{
PyErr_Format(PyExc_TypeError, "ten4 wrong rank");
%(fail)s;
}
if (PyArray_NDIM(%(neib_shape)s) != 1)
{
PyErr_Format(PyExc_TypeError, "neib_shape wrong rank");
%(fail)s;
}
if ( (PyArray_DIMS(%(neib_shape)s))[0] != 2)
{
PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
" contain 2 elements");
%(fail)s;
}
if (PyArray_NDIM(%(neib_step)s) != 1)
{
PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
%(fail)s;
}
if ( (PyArray_DIMS(%(neib_step)s))[0] != 2)
{
PyErr_Format(PyExc_TypeError,
"neib_step wrong step ; has to contain 2 elements");
%(fail)s;
}
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
// (step_x,step_y) = neib_step
const dtype_%(neib_step)s step_x = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const dtype_%(neib_step)s step_y = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
if (step_x <=0 || step_y <=0)
{
PyErr_Format(PyExc_ValueError,
"neib_step wrong step ; values <= 0. Got %%lld %%lld.",
(long long) step_x, (long long) step_y);
%(fail)s;
}
if (c <=0 || d <=0)
{
PyErr_Format(PyExc_ValueError,
"neib_shape values <= 0. Got %%lld %%lld.",
(long long)c, (long long)d);
%(fail)s;
}
if (%(mode)s == MODE_WRAP_CENTERED) {
if (c%%2!=1 || d%%2!=1){
PyErr_Format(PyExc_TypeError,
"Images2Neibs: in mode wrap_centered"
" need patch with odd shapes");
%(fail)s;
}
if ( (PyArray_DIMS(%(ten4)s))[2] < c ||
(PyArray_DIMS(%(ten4)s))[3] < d)
{
PyErr_Format(PyExc_TypeError,
"Images2Neibs: in wrap_centered mode, don't support image"
" shapes smaller then the patch shapes:"
" neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]",
(long int)c, (long int)d,
(long int)(PyArray_DIMS(%(ten4)s)[2]),
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);
} else if (%(mode)s == MODE_VALID) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%ld, neib_step[0]=%%ld and"
" ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x,
(long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
( (((PyArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%ld, neib_step[1]=%%ld and"
" ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y,
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
} else if (%(mode)s == MODE_IGNORE_BORDERS) {
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
} else if (%(mode)s == MODE_HALF) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%ld, neib_step[0]=%%ld and"
" ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x,
(long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
( (((PyArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%ld, neib_step[1]=%%ld and"
" ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y,
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
} else if (%(mode)s == MODE_FULL) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]+c-2) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%ld, neib_step[0]=%%ld and"
" ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x,
(long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
( (((PyArray_DIMS(%(ten4)s))[3]+d-2) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%ld, neib_step[1]=%%ld and"
" ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y,
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]+c-2)/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]+d-2)/step_y);
} else {
PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknown mode %%d", %(mode)s);
%(fail)s;
}
// new dimensions for z
const npy_intp z_dim1 = c * d;
const npy_intp z_dim0 = grid_c
* grid_d
* (PyArray_DIMS(%(ten4)s))[1]
* (PyArray_DIMS(%(ten4)s))[0];
if ((NULL == %(z)s)
|| ((PyArray_DIMS(%(z)s))[0] != z_dim0 )
|| ((PyArray_DIMS(%(z)s))[1] != z_dim1 )
)
{
Py_XDECREF(%(z)s);
npy_intp dims[2];
dims[0] = z_dim0;
dims[1] = z_dim1;
%(z)s = (PyArrayObject*) PyArray_EMPTY(2,
dims,
PyArray_TYPE((PyArrayObject*) py_%(ten4)s),
0);
if (!%(z)s)
{
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
%(fail)s;
}
}
}
{ // NESTED SCOPE
const int nb_batch = (PyArray_DIMS(%(ten4)s))[0];
const int nb_stack = (PyArray_DIMS(%(ten4)s))[1];
const int height = (PyArray_DIMS(%(ten4)s))[2];
const int width = (PyArray_DIMS(%(ten4)s))[3];
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
// (step_x,step_y) = neib_step
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
const int wrap_centered_half_idx_shift_x = c/2;
const int wrap_centered_half_idx_shift_y = d/2;
// Oh this is messed up...
for (int n = 0; n < nb_batch; n++) // loop over batches
for (int s = 0; s < nb_stack; s++) // loop over stacks
for (int a = 0; a < grid_c; a++) // loop over the number of patch in height
for (int b = 0; b < grid_d; b++) // loop over the number of patch in width
{
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c
{
int ten4_2 = i + a * step_x;
if (%(mode)s == MODE_WRAP_CENTERED) {
ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
} else if (%(mode)s == MODE_HALF) {
ten4_2 -= wrap_centered_half_idx_shift_x;
} else if (%(mode)s == MODE_FULL) {
ten4_2 -= c - 1;
}
if (ten4_2 < 0 | ten4_2 >= height) {
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, d * i);
memset(curr_z, 0, d*sizeof(*curr_z));
} else {
for (int j = 0; j < d; j++) // loop over d
{
int ten4_3 = j + b * step_y;
if (%(mode)s == MODE_WRAP_CENTERED) {
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
} else if (%(mode)s == MODE_HALF) {
ten4_3 -= wrap_centered_half_idx_shift_y;
} else if (%(mode)s == MODE_FULL) {
ten4_3 -= d - 1;
}
int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
if (ten4_3 < 0 | ten4_3 >= width) {
*curr_z = 0;
} else {
*curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
}
}
}
}
}
} // END NESTED SCOPE
""" % dict(
ten4=inp[0],
neib_shape=inp[1],
neib_step=inp[2],
z=out[0],
fail=sub["fail"],
mode=sub["params"],
)
def images2neibs(ten4, neib_shape, neib_step=None, mode="valid"):
r"""
Function :func:`images2neibs <pytensor.tensor.nnet.neighbours.images2neibs>`
allows to apply a sliding window operation to a tensor containing
images or other two-dimensional objects.
The sliding window operation loops over points in input data and stores
a rectangular neighbourhood of each point.
It is possible to assign a step of selecting patches (parameter `neib_step`).
Parameters
----------
ten4 : A 4d tensor-like
A 4-dimensional tensor which represents a list of lists of images.
It should have shape (list 1 dim, list 2 dim, row, col). The first
two dimensions can be useful to store different channels and batches.
neib_shape : A 1d tensor-like of 2 values
A tuple containing two values: height and width of the neighbourhood.
It should have shape (r,c) where r is the height of the neighborhood
in rows and c is the width of the neighborhood in columns.
neib_step : A 1d tensor-like of 2 values
(dr,dc) where dr is the number of rows to skip between patch and dc is
the number of columns. The parameter should be a tuple of two elements:
number of rows and number of columns to skip each iteration.
Basically, when the step is 1, the neighbourhood of every first element
is taken and every possible rectangular subset is returned.
By default it is equal to `neib_shape` in other words, the patches are
disjoint. When the step is greater than `neib_shape`, some elements are
omitted. When None, this is the same as neib_shape (patch are disjoint).
mode : {'valid', 'ignore_borders', 'wrap_centered', 'half'}
``valid``
Requires an input that is a multiple of the
pooling factor (in each direction).
``half``
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0]//2, neib_shape[1]//2)
``full``
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0] - 1, neib_shape[1] - 1)
``ignore_borders``
Same as valid, but will ignore the borders if the shape(s) of
the input is not a multiple of the pooling factor(s).
``wrap_centered``
?? TODO comment
Returns
-------
object
Reshapes the input as a 2D tensor where each row is an
pooling example. Pseudo-code of the output:
.. code-block:: python
idx = 0
for i in range(list 1 dim):
for j in range(list 2 dim):
for k in <image column coordinates>:
for l in <image row coordinates>:
output[idx,:]
= flattened version of ten4[i,j,l:l+r,k:k+c]
idx += 1
.. note:: The operation isn't necessarily implemented internally with
these for loops, they're just the easiest way to describe the
output pattern.
Notes
-----
.. note::
Currently the step size should be chosen in the way that the
corresponding dimension :math:`i` (width or height) is equal
to :math:`n * step\_size_i + neib\_shape_i` for some :math:`n`.
Examples
--------
.. code-block:: python
# Defining variables
images = pytensor.tensor.type.tensor4('images')
neibs = images2neibs(images, neib_shape=(5, 5))
# Constructing pytensor function
window_function = pytensor.function([images], neibs)
# Input tensor (one image 10x10)
im_val = np.arange(100.).reshape((1, 1, 10, 10))
# Function application
neibs_val = window_function(im_val)
.. note:: The underlying code will construct a 2D tensor of disjoint
patches 5x5. The output has shape 4x25.
"""
return Images2Neibs(mode)(ten4, neib_shape, neib_step)
def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
"""
Function :func:`neibs2images <pytensor.sandbox.neighbours.neibs2images>`
performs the inverse operation of
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`. It inputs
the output of :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`
and reconstructs its input.
Parameters
----------
neibs : 2d tensor
Like the one obtained by
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`.
neib_shape
`neib_shape` that was used in
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`.
original_shape
Original shape of the 4d tensor given to
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`
Returns
-------
object
Reconstructs the input of
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`,
a 4d tensor of shape `original_shape`.
Notes
-----
Currently, the function doesn't support tensors created with
`neib_step` different from default value. This means that it may be
impossible to compute the gradient of a variable gained by
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>` w.r.t.
its inputs in this case, because it uses
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>` for
gradient computation.
Examples
--------
Example, which uses a tensor gained in example for
:func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`:
.. code-block:: python
im_new = neibs2images(neibs, (5, 5), im_val.shape)
# PyTensor function definition
inv_window = pytensor.function([neibs], im_new)
# Function application
im_new_val = inv_window(neibs_val)
.. note:: The code will output the initial image array.
"""
neibs = as_tensor_variable(neibs)
neib_shape = as_tensor_variable(neib_shape)
original_shape = as_tensor_variable(original_shape)
new_neib_shape = stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
output_2d = images2neibs(
neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode
)
if mode == "ignore_borders":
# We use set_subtensor to accept original_shape we can't infer
# the shape and still raise error when it don't have the right
# shape.
valid_shape = original_shape
valid_shape = set_subtensor(
valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]
)
valid_shape = set_subtensor(
valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]
)
output_4d = output_2d.reshape(valid_shape, ndim=4)
# padding the borders with zeros
for d in (2, 3):
pad_shape = list(output_4d.shape)
pad_shape[d] = original_shape[d] - valid_shape[d]
output_4d = concatenate([output_4d, zeros(pad_shape)], axis=d)
elif mode == "valid":
# TODO: we do not implement all mode with this code.
# Add a check for the good cases.
output_4d = output_2d.reshape(original_shape, ndim=4)
else:
raise NotImplementedError(f"neibs2images do not support mode={mode}")
return output_4d
import warnings
warnings.warn(
"The module `pytensor.tensor.nnet.opt` is deprecated; use `pytensor.tensor.nnet.rewriting` instead.",
DeprecationWarning,
stacklevel=2,
)
from pytensor.tensor.nnet.rewriting import * # noqa: F401 E402 F403
"""
Optimizations addressing the ops in nnet root directory
"""
import pytensor
from pytensor import compile
from pytensor.compile import optdb
from pytensor.configdefaults import config
from pytensor.graph.rewriting.basic import (
MetaNodeRewriterSkip,
WalkingGraphRewriter,
copy_stack_trace,
in2out,
node_rewriter,
)
from pytensor.tensor.nnet.abstract_conv import (
AbstractConv2d,
AbstractConv2d_gradInputs,
AbstractConv2d_gradWeights,
AbstractConv3d,
AbstractConv3d_gradInputs,
AbstractConv3d_gradWeights,
get_conv_output_shape,
)
from pytensor.tensor.nnet.blocksparse import (
SparseBlockGemv,
SparseBlockOuter,
sparse_block_gemv_inplace,
sparse_block_outer_inplace,
)
# Cpu implementation
from pytensor.tensor.nnet.conv import ConvOp, conv2d
from pytensor.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
from pytensor.tensor.nnet.corr3d import (
Corr3dMM,
Corr3dMMGradInputs,
Corr3dMMGradWeights,
)
from pytensor.tensor.rewriting.basic import register_specialize_device
from pytensor.tensor.type import TensorType
@node_rewriter([SparseBlockGemv], inplace=True)
def local_inplace_sparse_block_gemv(fgraph, node):
"""
SparseBlockGemv(inplace=False) -> SparseBlockGemv(inplace=True)
"""
if isinstance(node.op, SparseBlockGemv) and not node.op.inplace:
new_node = sparse_block_gemv_inplace(*node.inputs)
copy_stack_trace(node.outputs[0], new_node)
return [new_node]
return False
compile.optdb.register(
"local_inplace_sparse_block_gemv",
WalkingGraphRewriter(
local_inplace_sparse_block_gemv,
failure_callback=WalkingGraphRewriter.warn_inplace,
),
"fast_run",
"inplace",
position=60,
)
@node_rewriter([SparseBlockOuter], inplace=True)
def local_inplace_sparse_block_outer(fgraph, node):
"""
SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True)
"""
if isinstance(node.op, SparseBlockOuter) and not node.op.inplace:
new_node = sparse_block_outer_inplace(*node.inputs)
copy_stack_trace(node.outputs[0], new_node)
return [new_node]
return False
compile.optdb.register(
"local_inplace_sparse_block_outer",
WalkingGraphRewriter(
local_inplace_sparse_block_outer,
failure_callback=WalkingGraphRewriter.warn_inplace,
),
"fast_run",
"inplace",
position=60,
)
# Conv opts
@node_rewriter([AbstractConv2d])
def local_abstractconv_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv2d):
return None
img, kern = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
flip = (slice(None),) * (kern.ndim - 2) + (slice(None, None, -1),) * 2
kern = kern[flip]
rval = CorrMM(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
unshared=node.op.unshared,
)(img, kern)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv3d])
def local_abstractconv3d_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv3d):
return None
img, kern = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMM(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
)(img, kern)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv2d_gradWeights])
def local_abstractconv_gradweight_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv2d_gradWeights):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
return None
rval = CorrMM_gradWeights(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
unshared=node.op.unshared,
)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary
if node.op.filter_flip:
flip = (slice(None),) * (rval.ndim - 2) + (slice(None, None, -1),) * 2
rval = rval[flip]
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv3d_gradWeights])
def local_abstractconv3d_gradweight_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv3d_gradWeights):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
return None
rval = Corr3dMMGradWeights(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary
if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1, ::-1]
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv2d_gradInputs])
def local_abstractconv_gradinputs_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv2d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, TensorType) or not isinstance(
topgrad.type, TensorType
):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
flip = (slice(None),) * (kern.ndim - 2) + (slice(None, None, -1),) * 2
kern = kern[flip]
rval = CorrMM_gradInputs(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
unshared=node.op.unshared,
)(kern, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv3d_gradInputs])
def local_abstractconv3d_gradinputs_gemm(fgraph, node):
# If config.blas__ldflags is empty, PyTensor will use
# a NumPy C implementation of [sd]gemm_.
if config.cxx == "" or node.inputs[0].dtype == "float16":
return
if not isinstance(node.op, AbstractConv3d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, TensorType) or not isinstance(
topgrad.type, TensorType
):
return None
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1, ::-1]
rval = Corr3dMMGradInputs(
border_mode=node.op.border_mode,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation,
num_groups=node.op.num_groups,
)(kern, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv2d])
def local_conv2d_cpu(fgraph, node):
if not isinstance(node.op, AbstractConv2d) or node.inputs[0].dtype == "float16":
return None
img, kern = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
return None
if node.op.border_mode not in ("full", "valid"):
return None
if not node.op.filter_flip:
# Not tested yet
return None
if node.op.num_groups > 1 or node.op.unshared:
return None
if node.op.filter_dilation != (1, 1):
return None
rval = conv2d(
img,
kern,
node.op.imshp,
node.op.kshp,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
)
copy_stack_trace(node.outputs[0], rval)
return [rval]
@node_rewriter([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_cpu(fgraph, node):
if (
not isinstance(node.op, AbstractConv2d_gradWeights)
or node.inputs[0].dtype == "float16"
):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
return None
if node.op.border_mode not in ("full", "valid"):
return None
if not node.op.filter_flip:
# Not tested yet
return
if node.op.num_groups > 1 or node.op.unshared:
return None
if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)):
return None
dx, dy = node.op.subsample
if dx not in (1, 2) or dy not in (1, 2):
# Not implemented in the gradient of ConvOp
return None
if node.op.imshp is None:
op_imshp = (None, None, None, None)
else:
op_imshp = node.op.imshp
if node.op.kshp is None:
op_kshp = (None, None, None, None)
else:
op_kshp = node.op.kshp
if None in op_imshp or None in op_kshp:
if (dx, dy) != (1, 1):
# We cannot infer the shapes
return None
# Determine gradient on kernels
assert len(op_imshp) == 4 and len(op_kshp) == 4
outshp = get_conv_output_shape(
op_imshp,
op_kshp,
node.op.border_mode,
node.op.subsample,
node.op.filter_dilation,
)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[
2:
]
newimg = img.dimshuffle((1, 0, 2, 3))
newtopgrad = topgrad.dimshuffle((1, 0, 2, 3))
if node.op.border_mode == "valid":
(img, filters) = (newimg, newtopgrad)
kshp_logical = fulloutshp
kshp_logical_top_aligned = False
imshp_logical = None
(bsize, nkern) = (op_imshp[1], op_kshp[0])
imshp = (op_imshp[0], op_imshp[2], op_imshp[3])
kshp = outshp
elif node.op.border_mode == "full":
(img, filters) = (newtopgrad, newimg)
kshp_logical = None
kshp_logical_top_aligned = True
imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1])
(bsize, nkern) = (op_kshp[0], op_imshp[1])
imshp = (op_imshp[0], outshp[0], outshp[1])
kshp = op_imshp[2:]
else:
raise NotImplementedError("Only [full,valid] modes are currently supported.")
# Flip the kernels
filters = filters[:, :, ::-1, ::-1]
dw = ConvOp(
imshp,
kshp,
nkern,
bsize,
1,
1,
output_mode="valid",
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
imshp_logical=imshp_logical,
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
direction_hint="bprop weights",
)
res = dw(img, filters)
copy_stack_trace(node.outputs[0], res)
if node.op.border_mode == "valid":
res = res.dimshuffle((1, 0, 2, 3))
res = res[:, :, ::-1, ::-1]
copy_stack_trace(node.outputs[0], res)
return [res]
@node_rewriter([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_cpu(fgraph, node):
if (
not isinstance(node.op, AbstractConv2d_gradInputs)
or node.inputs[0].dtype == "float16"
):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, TensorType) or not isinstance(
topgrad.type, TensorType
):
return None
if node.op.border_mode not in ("full", "valid"):
return None
if not node.op.filter_flip:
# Not tested yet
return None
if node.op.num_groups > 1 or node.op.unshared:
return None
# Conv 3d implementation, needed when subsample > 2
if node.op.border_mode == "valid" and node.op.subsample != (1, 1):
# The op don't support that anymore.
return False
# Conv2d Implementation
dx, dy = node.op.subsample
if dx not in (1, 2) or dy not in (1, 2):
# Not implemented in the gradient of ConvOp
return None
if node.op.imshp is None:
op_imshp = (None, None, None, None)
else:
op_imshp = node.op.imshp
if node.op.kshp is None:
op_kshp = (None, None, None, None)
else:
op_kshp = node.op.kshp
if None in op_imshp or None in op_kshp:
if (dx, dy) != (1, 1):
return None
mode = "valid"
if node.op.border_mode != "full":
mode = "full"
filters = kern.dimshuffle((1, 0, 2, 3))
filters = filters[:, :, ::-1, ::-1]
outshp = get_conv_output_shape(
op_imshp,
op_kshp,
node.op.border_mode,
node.op.subsample,
node.op.filter_dilation,
)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[
2:
]
nkern = op_imshp[1]
imshp = (op_kshp[0], outshp[0], outshp[1])
imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1])
din = ConvOp(
imshp,
op_kshp[2:],
nkern,
op_imshp[0],
1,
1,
output_mode=mode,
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
imshp_logical=imshp_logical,
kshp_logical=None,
version=-1,
direction_hint="bprop inputs",
)
din = din(topgrad, filters)
copy_stack_trace(node.outputs[0], din)
return [din]
# Register Cpu Optimization
conv_groupopt = pytensor.graph.rewriting.db.LocalGroupDB()
conv_groupopt.__name__ = "conv_opts"
register_specialize_device(conv_groupopt, "fast_compile", "fast_run")
# GEMM-based convolution
# It can be disabled by excluding 'conv_gemm'.
conv_groupopt.register(
"local_abstractconv_gemm",
local_abstractconv_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
conv_groupopt.register(
"local_abstractconv_gradweight_gemm",
local_abstractconv_gradweight_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
conv_groupopt.register(
"local_abstractconv_gradinputs_gemm",
local_abstractconv_gradinputs_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
conv_groupopt.register(
"local_abstractconv3d_gemm",
local_abstractconv3d_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
conv_groupopt.register(
"local_abstractconv3d_gradweight_gemm",
local_abstractconv3d_gradweight_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
conv_groupopt.register(
"local_abstractconv3d_gradinputs_gemm",
local_abstractconv3d_gradinputs_gemm,
"conv_gemm",
"fast_compile",
"fast_run",
position=30,
)
# Legacy convolution
conv_groupopt.register(
"local_conv2d_cpu", local_conv2d_cpu, "fast_compile", "fast_run", position=40
)
conv_groupopt.register(
"local_conv2d_gradweight_cpu",
local_conv2d_gradweight_cpu,
"fast_compile",
"fast_run",
position=40,
)
conv_groupopt.register(
"local_conv2d_gradinputs_cpu",
local_conv2d_gradinputs_cpu,
"fast_compile",
"fast_run",
position=40,
)
# Verify that no AbstractConv are present in the graph
@node_rewriter(
[
AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs,
]
)
def local_abstractconv_check(fgraph, node):
if isinstance(
node.op,
(
AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs,
),
):
raise MetaNodeRewriterSkip(
f"{node.op.__class__.__name__} PyTensor rewriting failed: there is no implementation "
"available supporting the requested options. If on CPU, "
"do you have a BLAS library installed PyTensor can link against? "
"On the CPU we do not support float16."
)
optdb.register(
"AbstractConvCheck",
in2out(local_abstractconv_check, name="AbstractConvCheck"),
"fast_compile",
"fast_run",
position=48.7,
)
"""
These functions implement special cases of exp and log to improve numerical
stability.
"""
import pytensor
from pytensor import printing
from pytensor import scalar as aes
from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
from pytensor.printing import pprint
from pytensor.scalar import sigmoid as scalar_sigmoid
from pytensor.scalar.math import Sigmoid
from pytensor.tensor.basic import constant
from pytensor.tensor.elemwise import Elemwise
from pytensor.tensor.math import clip, sigmoid
from pytensor.tensor.type import TensorType
class UltraFastScalarSigmoid(aes.UnaryScalarOp):
"""
This is just speed opt. Not for stability.
"""
nfunc_spec = ("scipy.special.expit", 1, 1)
@staticmethod
def st_impl(x):
x = 0.5 * x
# The if is a tanh approximate.
if x >= 0:
if x < 1.7:
z = 1.5 * x / (1 + x)
elif x < 3:
z = 0.935409070603099 + 0.0458812946797165 * (x - 1.7)
else:
z = 0.99505475368673
else:
xx = -x
if xx < 1.7:
z = 1.5 * xx / (1 + xx)
elif xx < 3:
z = 0.935409070603099 + 0.0458812946797165 * (xx - 1.7)
else:
z = 0.99505475368673
z = -z
return 0.5 * (z + 1.0)
def impl(self, x):
return UltraFastScalarSigmoid.st_impl(x)
def c_code(self, node, name, inp, out, sub):
(x,) = inp
(z,) = out
dtype = node.outputs[0].type.dtype_specs()[1]
return (
"""{
%(dtype)s x = 0.5 * %(x)s;
// The if is a tanh approximate.
if(x>=0) {
%(z)s = (x<1.7 ? (1.5*x/(1+x)) :
(x<3 ? (0.935409070603099 + 0.0458812946797165*(x-1.7)):
0.99505475368673));
} else {
%(dtype)s xx = -x;
%(z)s = -(xx<1.7 ? (1.5*xx/(1+xx)) :
(xx<3 ? (0.935409070603099 + 0.0458812946797165*(xx-1.7)):
0.99505475368673));
}
//%(z)s = 0.5*(ultrafasttanh(0.5*x)+1.);
%(z)s = 0.5*(%(z)s+1.);
}"""
% locals()
)
@staticmethod
def c_code_cache_version():
return (5,)
ultra_fast_scalar_sigmoid = UltraFastScalarSigmoid(
aes.upgrade_to_float, name="ultra_fast_scalar_sigmoid"
)
ultra_fast_sigmoid = Elemwise(ultra_fast_scalar_sigmoid, name="ultra_fast_sigmoid")
ultra_fast_sigmoid_inplace = Elemwise(
UltraFastScalarSigmoid(aes.transfer_type(0)),
inplace_pattern={0: 0},
name="ultra_fast_sigmoid_inplace",
)
pprint.assign(ultra_fast_sigmoid, printing.FunctionPrinter(["ultra_fast_sigmoid"]))
@node_rewriter(None)
def local_ultra_fast_sigmoid(fgraph, node):
"""
When enabled, change all sigmoid to ultra_fast_sigmoid.
For example do mode.including('local_ultra_fast_sigmoid')
or use the PyTensor flag optimizer_including=local_ultra_fast_sigmoid.
This speeds up the sigmoid op by using an approximation.
This is done after the stabilization and specialize phases
to avoid interacting with them.
"""
if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, Sigmoid):
if node.op.inplace_pattern:
out = ultra_fast_sigmoid_inplace(node.inputs[0])
else:
out = ultra_fast_sigmoid(node.inputs[0])
copy_stack_trace(node.outputs[0], out)
def values_eq_approx_remove_low_prec(a, b):
# atol is found by trial/error.
# Other test could fail without good reason.
return TensorType.values_eq_approx(a, b, atol=0.02)
# Let DebugMode know that there this opt approx the values.
out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out]
pytensor.compile.optdb["uncanonicalize"].register(
"local_ultra_fast_sigmoid", local_ultra_fast_sigmoid
)
def hard_sigmoid(x):
"""
An approximation of sigmoid.
More approximate and faster than ultra_fast_sigmoid.
Approx in 3 parts: 0, scaled linear, 1.
Removing the slope and shift does not make it faster.
"""
# Use the same dtype as determined by "upgrade_to_float",
# and perform computation in that dtype.
out_dtype = aes.upgrade_to_float(aes.ScalarType(dtype=x.dtype))[0].dtype
slope = constant(0.2, dtype=out_dtype)
shift = constant(0.5, dtype=out_dtype)
x = (x * slope) + shift
x = clip(x, 0, 1)
return x
@node_rewriter([sigmoid])
def local_hard_sigmoid(fgraph, node):
if isinstance(node.op, Elemwise) and node.op.scalar_op == scalar_sigmoid:
out = hard_sigmoid(node.inputs[0])
copy_stack_trace(node.outputs[0], out)
def values_eq_approx_remove_low_prec(a, b):
# atol is found by trial/error.
# Other test could fail without good reason.
return TensorType.values_eq_approx(a, b, atol=0.1)
# Let DebugMode know that there this opt approx the values.
out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out]
pytensor.compile.optdb["uncanonicalize"].register(
"local_hard_sigmoid", local_hard_sigmoid
)
...@@ -13,7 +13,7 @@ from pytensor.scalar import int64 ...@@ -13,7 +13,7 @@ from pytensor.scalar import int64
from pytensor.tensor import blas_headers from pytensor.tensor import blas_headers
from pytensor.tensor.basic import as_tensor_variable from pytensor.tensor.basic import as_tensor_variable
from pytensor.tensor.blas import blas_header_version, ldflags from pytensor.tensor.blas import blas_header_version, ldflags
from pytensor.tensor.nnet.abstract_conv import get_conv_output_shape from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
from pytensor.tensor.type import TensorType from pytensor.tensor.type import TensorType
......
...@@ -13,7 +13,7 @@ from pytensor.scalar import int8, int64 ...@@ -13,7 +13,7 @@ from pytensor.scalar import int8, int64
from pytensor.tensor import blas_headers from pytensor.tensor import blas_headers
from pytensor.tensor.basic import as_tensor_variable from pytensor.tensor.basic import as_tensor_variable
from pytensor.tensor.blas import blas_header_version, ldflags from pytensor.tensor.blas import blas_header_version, ldflags
from pytensor.tensor.nnet.abstract_conv import get_conv_output_shape from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
from pytensor.tensor.type import TensorType from pytensor.tensor.type import TensorType
......
...@@ -6,9 +6,8 @@ import pytensor.tensor as at ...@@ -6,9 +6,8 @@ import pytensor.tensor as at
from pytensor.compile.mode import Mode from pytensor.compile.mode import Mode
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.graph.rewriting.basic import check_stack_trace from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.tensor.nnet import abstract_conv as conv from pytensor.tensor.conv import abstract_conv
from pytensor.tensor.nnet import conv2d_transpose, corr, corr3d from pytensor.tensor.conv.abstract_conv import (
from pytensor.tensor.nnet.abstract_conv import (
AbstractConv2d, AbstractConv2d,
AbstractConv2d_gradInputs, AbstractConv2d_gradInputs,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
...@@ -19,18 +18,13 @@ from pytensor.tensor.nnet.abstract_conv import ( ...@@ -19,18 +18,13 @@ from pytensor.tensor.nnet.abstract_conv import (
bilinear_upsampling, bilinear_upsampling,
causal_conv1d, causal_conv1d,
check_conv_gradinputs_shape, check_conv_gradinputs_shape,
conv2d_transpose,
get_conv_gradinputs_shape, get_conv_gradinputs_shape,
get_conv_gradweights_shape, get_conv_gradweights_shape,
get_conv_output_shape, get_conv_output_shape,
separable_conv2d, separable_conv2d,
separable_conv3d, separable_conv3d,
) )
from pytensor.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
from pytensor.tensor.nnet.corr3d import (
Corr3dMM,
Corr3dMMGradInputs,
Corr3dMMGradWeights,
)
from pytensor.tensor.type import ( from pytensor.tensor.type import (
TensorType, TensorType,
ftensor4, ftensor4,
...@@ -41,6 +35,7 @@ from pytensor.tensor.type import ( ...@@ -41,6 +35,7 @@ from pytensor.tensor.type import (
tensor5, tensor5,
) )
from tests import unittest_tools as utt from tests import unittest_tools as utt
from tests.tensor.conv import c_conv3d_corr3d_ref, c_conv_corr_ref
def conv2d_corr( def conv2d_corr(
...@@ -53,7 +48,9 @@ def conv2d_corr( ...@@ -53,7 +48,9 @@ def conv2d_corr(
): ):
if conv_mode == "conv": if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1] filters = filters[:, :, ::-1, ::-1]
return corr.CorrMM(border_mode, subsample, filter_dilation)(inputs, filters) return c_conv_corr_ref.CorrMM(border_mode, subsample, filter_dilation)(
inputs, filters
)
def conv2d_corr_gw( def conv2d_corr_gw(
...@@ -65,7 +62,7 @@ def conv2d_corr_gw( ...@@ -65,7 +62,7 @@ def conv2d_corr_gw(
conv_mode="conv", conv_mode="conv",
filter_dilation=(1, 1), filter_dilation=(1, 1),
): ):
rval = corr.CorrMM_gradWeights(border_mode, subsample, filter_dilation)( rval = c_conv_corr_ref.CorrMM_gradWeights(border_mode, subsample, filter_dilation)(
inputs, topgrad, filters_shape[2:] inputs, topgrad, filters_shape[2:]
) )
if conv_mode == "conv": if conv_mode == "conv":
...@@ -84,7 +81,7 @@ def conv2d_corr_gi( ...@@ -84,7 +81,7 @@ def conv2d_corr_gi(
): ):
if conv_mode == "conv": if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1] filters = filters[:, :, ::-1, ::-1]
return corr.CorrMM_gradInputs(border_mode, subsample, filter_dilation)( return c_conv_corr_ref.CorrMM_gradInputs(border_mode, subsample, filter_dilation)(
filters, topgrad, inputs_shape[2:] filters, topgrad, inputs_shape[2:]
) )
...@@ -99,7 +96,9 @@ def conv3d_corr( ...@@ -99,7 +96,9 @@ def conv3d_corr(
): ):
if conv_mode == "conv": if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1, ::-1] filters = filters[:, :, ::-1, ::-1, ::-1]
return corr3d.Corr3dMM(border_mode, subsample, filter_dilation)(inputs, filters) return c_conv3d_corr3d_ref.Corr3dMM(border_mode, subsample, filter_dilation)(
inputs, filters
)
def conv3d_corr_gw( def conv3d_corr_gw(
...@@ -111,9 +110,9 @@ def conv3d_corr_gw( ...@@ -111,9 +110,9 @@ def conv3d_corr_gw(
conv_mode="conv", conv_mode="conv",
filter_dilation=(1, 1, 1), filter_dilation=(1, 1, 1),
): ):
rval = corr3d.Corr3dMMGradWeights(border_mode, subsample, filter_dilation)( rval = c_conv3d_corr3d_ref.Corr3dMMGradWeights(
inputs, topgrad, filters_shape[2:] border_mode, subsample, filter_dilation
) )(inputs, topgrad, filters_shape[2:])
if conv_mode == "conv": if conv_mode == "conv":
rval = rval[:, :, ::-1, ::-1, ::-1] rval = rval[:, :, ::-1, ::-1, ::-1]
return rval return rval
...@@ -130,9 +129,9 @@ def conv3d_corr_gi( ...@@ -130,9 +129,9 @@ def conv3d_corr_gi(
): ):
if conv_mode == "conv": if conv_mode == "conv":
filters = filters[:, :, ::-1, ::-1, ::-1] filters = filters[:, :, ::-1, ::-1, ::-1]
return corr3d.Corr3dMMGradInputs(border_mode, subsample, filter_dilation)( return c_conv3d_corr3d_ref.Corr3dMMGradInputs(
filters, topgrad, inputs_shape[2:] border_mode, subsample, filter_dilation
) )(filters, topgrad, inputs_shape[2:])
class TestGetConvOutShape: class TestGetConvOutShape:
...@@ -338,7 +337,7 @@ class TestAssertShape: ...@@ -338,7 +337,7 @@ class TestAssertShape:
input = tensor4() input = tensor4()
filters = tensor4() filters = tensor4()
out = conv.abstract_conv2d( out = abstract_conv.abstract_conv2d(
input, filters, input_shape=(3, 5, 7, 11), filter_shape=(7, 5, 3, 3) input, filters, input_shape=(3, 5, 7, 11), filter_shape=(7, 5, 3, 3)
) )
f = pytensor.function([input, filters], out) f = pytensor.function([input, filters], out)
...@@ -361,7 +360,7 @@ class TestAssertShape: ...@@ -361,7 +360,7 @@ class TestAssertShape:
input = tensor5() input = tensor5()
filters = tensor5() filters = tensor5()
out = conv.conv3d( out = abstract_conv.conv3d(
input, filters, input_shape=(3, 5, 7, 11, 13), filter_shape=(7, 5, 3, 3, 3) input, filters, input_shape=(3, 5, 7, 11, 13), filter_shape=(7, 5, 3, 3, 3)
) )
f = pytensor.function([input, filters], out) f = pytensor.function([input, filters], out)
...@@ -383,7 +382,7 @@ class TestAssertShape: ...@@ -383,7 +382,7 @@ class TestAssertShape:
output_grad = tensor4() output_grad = tensor4()
filters = tensor4() filters = tensor4()
out = conv.conv2d_grad_wrt_inputs( out = abstract_conv.conv2d_grad_wrt_inputs(
output_grad, output_grad,
filters, filters,
input_shape=(None, None, 7, 11), input_shape=(None, None, 7, 11),
...@@ -403,7 +402,7 @@ class TestAssertShape: ...@@ -403,7 +402,7 @@ class TestAssertShape:
output_grad = tensor5() output_grad = tensor5()
filters = tensor5() filters = tensor5()
out = conv.conv3d_grad_wrt_inputs( out = abstract_conv.conv3d_grad_wrt_inputs(
output_grad, output_grad,
filters, filters,
input_shape=(None, None, 7, 11, 13), input_shape=(None, None, 7, 11, 13),
...@@ -422,7 +421,7 @@ class TestAssertShape: ...@@ -422,7 +421,7 @@ class TestAssertShape:
input = tensor4() input = tensor4()
output_grad = tensor4() output_grad = tensor4()
out = conv.conv2d_grad_wrt_weights( out = abstract_conv.conv2d_grad_wrt_weights(
input, input,
output_grad, output_grad,
filter_shape=(None, None, 3, 3), filter_shape=(None, None, 3, 3),
...@@ -442,7 +441,7 @@ class TestAssertShape: ...@@ -442,7 +441,7 @@ class TestAssertShape:
input = tensor5() input = tensor5()
output_grad = tensor5() output_grad = tensor5()
out = conv.conv3d_grad_wrt_weights( out = abstract_conv.conv3d_grad_wrt_weights(
input, input,
output_grad, output_grad,
filter_shape=(None, None, 3, 3, 3), filter_shape=(None, None, 3, 3, 3),
...@@ -892,8 +891,8 @@ class BaseTestConv2d(BaseTestConv): ...@@ -892,8 +891,8 @@ class BaseTestConv2d(BaseTestConv):
self, self,
inputs_shape, inputs_shape,
filters_shape, filters_shape,
conv_fn=conv.abstract_conv2d, conv_fn=abstract_conv.abstract_conv2d,
conv_op=conv.AbstractConv2d, conv_op=abstract_conv.AbstractConv2d,
ref=conv2d_corr, ref=conv2d_corr,
**kwargs, **kwargs,
): ):
...@@ -911,7 +910,7 @@ class BaseTestConv2d(BaseTestConv): ...@@ -911,7 +910,7 @@ class BaseTestConv2d(BaseTestConv):
inputs_shape, inputs_shape,
filters_shape, filters_shape,
output_shape, output_shape,
gradWeights_fn=conv.AbstractConv2d_gradWeights, gradWeights_fn=abstract_conv.AbstractConv2d_gradWeights,
ref=conv2d_corr_gw, ref=conv2d_corr_gw,
**kwargs, **kwargs,
): ):
...@@ -929,7 +928,7 @@ class BaseTestConv2d(BaseTestConv): ...@@ -929,7 +928,7 @@ class BaseTestConv2d(BaseTestConv):
inputs_shape, inputs_shape,
filters_shape, filters_shape,
output_shape, output_shape,
gradInputs_fn=conv.AbstractConv2d_gradInputs, gradInputs_fn=abstract_conv.AbstractConv2d_gradInputs,
ref=conv2d_corr_gi, ref=conv2d_corr_gi,
**kwargs, **kwargs,
): ):
...@@ -943,96 +942,6 @@ class BaseTestConv2d(BaseTestConv): ...@@ -943,96 +942,6 @@ class BaseTestConv2d(BaseTestConv):
) )
@pytest.mark.skipif(
not config.cxx or config.mode == "FAST_COMPILE",
reason="Need blas to test conv2d",
)
class TestCorrConv2d(BaseTestConv2d):
@classmethod
def setup_class(cls):
# This tests can run even when config.blas__ldflags is empty.
super().setup_class()
def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(
inputs_shape=i,
filters_shape=f,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=CorrMM,
check_trace=True,
filter_dilation=fd,
)
self.run_gradweight(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=CorrMM_gradWeights,
check_trace=True,
filter_dilation=fd,
)
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=CorrMM_gradInputs,
check_trace=True,
filter_dilation=fd,
)
def run_test_case_gi(
self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False
):
if not expect_error:
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=CorrMM_gradInputs,
check_trace=True,
filter_dilation=fd,
)
else:
with pytest.raises(ValueError):
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=CorrMM_gradInputs,
ref=None,
check_trace=True,
filter_dilation=fd,
)
@pytest.mark.slow
def test_all(self):
super().test_all()
@pytest.mark.skipif( @pytest.mark.skipif(
config.cxx == "", config.cxx == "",
reason="SciPy and cxx needed", reason="SciPy and cxx needed",
...@@ -1252,8 +1161,8 @@ class BaseTestConv3d(BaseTestConv): ...@@ -1252,8 +1161,8 @@ class BaseTestConv3d(BaseTestConv):
self, self,
inputs_shape, inputs_shape,
filters_shape, filters_shape,
conv_fn=conv.conv3d, conv_fn=abstract_conv.conv3d,
conv_op=conv.AbstractConv3d, conv_op=abstract_conv.AbstractConv3d,
ref=conv3d_corr, ref=conv3d_corr,
**kwargs, **kwargs,
): ):
...@@ -1271,7 +1180,7 @@ class BaseTestConv3d(BaseTestConv): ...@@ -1271,7 +1180,7 @@ class BaseTestConv3d(BaseTestConv):
inputs_shape, inputs_shape,
filters_shape, filters_shape,
output_shape, output_shape,
gradWeights_fn=conv.AbstractConv3d_gradWeights, gradWeights_fn=abstract_conv.AbstractConv3d_gradWeights,
ref=conv3d_corr_gw, ref=conv3d_corr_gw,
**kwargs, **kwargs,
): ):
...@@ -1289,7 +1198,7 @@ class BaseTestConv3d(BaseTestConv): ...@@ -1289,7 +1198,7 @@ class BaseTestConv3d(BaseTestConv):
inputs_shape, inputs_shape,
filters_shape, filters_shape,
output_shape, output_shape,
gradInputs_fn=conv.AbstractConv3d_gradInputs, gradInputs_fn=abstract_conv.AbstractConv3d_gradInputs,
ref=conv3d_corr_gi, ref=conv3d_corr_gi,
**kwargs, **kwargs,
): ):
...@@ -1303,94 +1212,6 @@ class BaseTestConv3d(BaseTestConv): ...@@ -1303,94 +1212,6 @@ class BaseTestConv3d(BaseTestConv):
) )
@pytest.mark.skipif(
not config.cxx or config.mode == "FAST_COMPILE",
reason="Need blas to test conv3d",
)
class TestCorrConv3d(BaseTestConv3d):
@classmethod
def setup_class(cls):
# This tests can run even when config.blas__ldflags is empty.
super().setup_class()
def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
o = self.get_output_shape(i, f, s, b, fd)
# This test can run even when config.blas__ldflags is empty.
self.run_fwd(
inputs_shape=i,
filters_shape=f,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=Corr3dMM,
check_trace=True,
filter_dilation=fd,
)
self.run_gradweight(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=Corr3dMMGradWeights,
check_trace=True,
filter_dilation=fd,
)
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=Corr3dMMGradInputs,
check_trace=True,
filter_dilation=fd,
)
def run_test_case_gi(
self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False
):
# This test can run even when config.blas__ldflags is empty.
if not expect_error:
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=True,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=Corr3dMMGradInputs,
check_trace=True,
filter_dilation=fd,
)
else:
with pytest.raises(ValueError):
self.run_gradinput(
inputs_shape=i,
filters_shape=f,
output_shape=o,
subsample=s,
verify_grad=False,
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip,
target_op=Corr3dMMGradInputs,
ref=None,
check_trace=True,
filter_dilation=fd,
)
def test_constant_shapes(): def test_constant_shapes():
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops # Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
# are rejected if not constant or None # are rejected if not constant or None
...@@ -1451,7 +1272,7 @@ class TestConvTypes: ...@@ -1451,7 +1272,7 @@ class TestConvTypes:
out_shape = lvector() out_shape = lvector()
output = conv.abstract_conv2d(input, filters) output = abstract_conv.abstract_conv2d(input, filters)
grad_input, grad_filters = pytensor.grad(output.sum(), wrt=(input, filters)) grad_input, grad_filters = pytensor.grad(output.sum(), wrt=(input, filters))
assert grad_input.type == input.type, ( assert grad_input.type == input.type, (
grad_input, grad_input,
...@@ -1466,7 +1287,9 @@ class TestConvTypes: ...@@ -1466,7 +1287,9 @@ class TestConvTypes:
filters.type, filters.type,
) )
grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape) grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
input, topgrad, out_shape
)
grad_input, grad_topgrad = pytensor.grad( grad_input, grad_topgrad = pytensor.grad(
grad_filters.sum(), wrt=(input, topgrad) grad_filters.sum(), wrt=(input, topgrad)
) )
...@@ -1484,7 +1307,9 @@ class TestConvTypes: ...@@ -1484,7 +1307,9 @@ class TestConvTypes:
topgrad.type, topgrad.type,
) )
grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape) grad_input = abstract_conv.AbstractConv2d_gradInputs()(
filters, topgrad, out_shape
)
grad_filters, grad_topgrad = pytensor.grad( grad_filters, grad_topgrad = pytensor.grad(
grad_input.sum(), wrt=(filters, topgrad) grad_input.sum(), wrt=(filters, topgrad)
) )
...@@ -1511,7 +1336,7 @@ class TestConvTypes: ...@@ -1511,7 +1336,7 @@ class TestConvTypes:
out_shape = lvector() out_shape = lvector()
# Check the forward Op # Check the forward Op
output = conv.abstract_conv2d(constant_tensor, filters) output = abstract_conv.abstract_conv2d(constant_tensor, filters)
grad_filters = pytensor.grad(output.sum(), wrt=filters) grad_filters = pytensor.grad(output.sum(), wrt=filters)
assert filters.type.is_super(grad_filters.type), ( assert filters.type.is_super(grad_filters.type), (
grad_filters, grad_filters,
...@@ -1520,7 +1345,7 @@ class TestConvTypes: ...@@ -1520,7 +1345,7 @@ class TestConvTypes:
filters.type, filters.type,
) )
output = conv.abstract_conv2d(input, constant_tensor) output = abstract_conv.abstract_conv2d(input, constant_tensor)
grad_input = pytensor.grad(output.sum(), wrt=input) grad_input = pytensor.grad(output.sum(), wrt=input)
assert input.type.is_super(grad_input.type), ( assert input.type.is_super(grad_input.type), (
grad_input, grad_input,
...@@ -1530,7 +1355,7 @@ class TestConvTypes: ...@@ -1530,7 +1355,7 @@ class TestConvTypes:
) )
# Check grad wrt weights # Check grad wrt weights
grad_filters = conv.AbstractConv2d_gradWeights()( grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
constant_tensor, topgrad, out_shape constant_tensor, topgrad, out_shape
) )
grad_topgrad = pytensor.grad(grad_filters.sum(), wrt=topgrad) grad_topgrad = pytensor.grad(grad_filters.sum(), wrt=topgrad)
...@@ -1541,7 +1366,7 @@ class TestConvTypes: ...@@ -1541,7 +1366,7 @@ class TestConvTypes:
topgrad.type, topgrad.type,
) )
grad_filters = conv.AbstractConv2d_gradWeights()( grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
input, constant_tensor, out_shape input, constant_tensor, out_shape
) )
grad_input = pytensor.grad(grad_filters.sum(), wrt=input) grad_input = pytensor.grad(grad_filters.sum(), wrt=input)
...@@ -1553,7 +1378,7 @@ class TestConvTypes: ...@@ -1553,7 +1378,7 @@ class TestConvTypes:
) )
# Check grad wrt inputs # Check grad wrt inputs
grad_input = conv.AbstractConv2d_gradInputs()( grad_input = abstract_conv.AbstractConv2d_gradInputs()(
constant_tensor, topgrad, out_shape constant_tensor, topgrad, out_shape
) )
grad_topgrad = pytensor.grad(grad_input.sum(), wrt=topgrad) grad_topgrad = pytensor.grad(grad_input.sum(), wrt=topgrad)
...@@ -1564,7 +1389,7 @@ class TestConvTypes: ...@@ -1564,7 +1389,7 @@ class TestConvTypes:
topgrad.type, topgrad.type,
) )
grad_input = conv.AbstractConv2d_gradInputs()( grad_input = abstract_conv.AbstractConv2d_gradInputs()(
filters, constant_tensor, out_shape filters, constant_tensor, out_shape
) )
grad_filters = pytensor.grad(grad_input.sum(), wrt=filters) grad_filters = pytensor.grad(grad_input.sum(), wrt=filters)
...@@ -1923,13 +1748,13 @@ class TestConv2dGrads: ...@@ -1923,13 +1748,13 @@ class TestConv2dGrads:
self.output_grad = tensor4() self.output_grad = tensor4()
self.output_grad_wrt = tensor4() self.output_grad_wrt = tensor4()
self.x = tensor4("x", config.floatX) # inputs self.x = tensor4("x", dtype=config.floatX) # inputs
self.w = tensor4("w", config.floatX) # filter weights self.w = tensor4("w", dtype=config.floatX) # filter weights
def test_conv2d_grad_wrt_inputs(self): def test_conv2d_grad_wrt_inputs(self):
# Compares calculated abstract grads wrt inputs with the fwd grads # Compares calculated abstract grads wrt inputs with the fwd grads
# This method checks the outputs of `conv2_grad_wrt_inputs` against # This method checks the outputs of `conv2_grad_wrt_inputs` against
# the outputs of `pytensor.tensor.nnet.conv` forward grads to make sure the # the outputs of `pytensor.tensor.conv` forward grads to make sure the
# results are the same. # results are the same.
for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes): for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
...@@ -1942,18 +1767,16 @@ class TestConv2dGrads: ...@@ -1942,18 +1767,16 @@ class TestConv2dGrads:
filter_val = self.random_stream.random(fltr_shape).astype( filter_val = self.random_stream.random(fltr_shape).astype(
config.floatX config.floatX
) )
out_grad_shape = ( out_grad_shape = abstract_conv.get_conv_output_shape(
pytensor.tensor.nnet.abstract_conv.get_conv_output_shape( image_shape=in_shape,
image_shape=in_shape, kernel_shape=fltr_shape,
kernel_shape=fltr_shape, border_mode=bm,
border_mode=bm, subsample=ss,
subsample=ss,
)
) )
out_grad_val = self.random_stream.random(out_grad_shape).astype( out_grad_val = self.random_stream.random(out_grad_shape).astype(
config.floatX config.floatX
) )
conv_out = pytensor.tensor.nnet.conv2d( conv_out = abstract_conv.conv2d(
self.x, self.x,
filters=self.w, filters=self.w,
border_mode=bm, border_mode=bm,
...@@ -1971,16 +1794,14 @@ class TestConv2dGrads: ...@@ -1971,16 +1794,14 @@ class TestConv2dGrads:
[self.x, self.w, self.output_grad], conv_grad [self.x, self.w, self.output_grad], conv_grad
) )
conv_wrt_i_out = ( conv_wrt_i_out = abstract_conv.conv2d_grad_wrt_inputs(
pytensor.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs( output_grad=self.output_grad_wrt,
output_grad=self.output_grad_wrt, filters=self.w,
filters=self.w, border_mode=bm,
border_mode=bm, subsample=ss,
subsample=ss, input_shape=in_shape,
input_shape=in_shape, filter_shape=fltr_shape,
filter_shape=fltr_shape, filter_flip=ff,
filter_flip=ff,
)
) )
f_new = pytensor.function( f_new = pytensor.function(
[self.w, self.output_grad_wrt], conv_wrt_i_out [self.w, self.output_grad_wrt], conv_wrt_i_out
...@@ -1995,7 +1816,7 @@ class TestConv2dGrads: ...@@ -1995,7 +1816,7 @@ class TestConv2dGrads:
def test_conv2d_grad_wrt_weights(self): def test_conv2d_grad_wrt_weights(self):
# Compares calculated abstract grads wrt weights with the fwd grads # Compares calculated abstract grads wrt weights with the fwd grads
# This method checks the outputs of `conv2_grad_wrt_weights` against # This method checks the outputs of `conv2_grad_wrt_weights` against
# the outputs of `pytensor.tensor.nnet.conv` forward grads to make sure the # the outputs of `pytensor.tensor.conv` forward grads to make sure the
# results are the same. # results are the same.
for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes): for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
...@@ -2008,18 +1829,16 @@ class TestConv2dGrads: ...@@ -2008,18 +1829,16 @@ class TestConv2dGrads:
filter_val = self.random_stream.random(fltr_shape).astype( filter_val = self.random_stream.random(fltr_shape).astype(
config.floatX config.floatX
) )
out_grad_shape = ( out_grad_shape = abstract_conv.get_conv_output_shape(
pytensor.tensor.nnet.abstract_conv.get_conv_output_shape( image_shape=in_shape,
image_shape=in_shape, kernel_shape=fltr_shape,
kernel_shape=fltr_shape, border_mode=bm,
border_mode=bm, subsample=ss,
subsample=ss,
)
) )
out_grad_val = self.random_stream.random(out_grad_shape).astype( out_grad_val = self.random_stream.random(out_grad_shape).astype(
config.floatX config.floatX
) )
conv_out = pytensor.tensor.nnet.conv2d( conv_out = abstract_conv.conv2d(
self.x, self.x,
filters=self.w, filters=self.w,
border_mode=bm, border_mode=bm,
...@@ -2037,16 +1856,14 @@ class TestConv2dGrads: ...@@ -2037,16 +1856,14 @@ class TestConv2dGrads:
[self.x, self.w, self.output_grad], conv_grad [self.x, self.w, self.output_grad], conv_grad
) )
conv_wrt_w_out = ( conv_wrt_w_out = abstract_conv.conv2d_grad_wrt_weights(
pytensor.tensor.nnet.abstract_conv.conv2d_grad_wrt_weights( self.x,
self.x, output_grad=self.output_grad_wrt,
output_grad=self.output_grad_wrt, border_mode=bm,
border_mode=bm, subsample=ss,
subsample=ss, input_shape=in_shape,
input_shape=in_shape, filter_shape=fltr_shape,
filter_shape=fltr_shape, filter_flip=ff,
filter_flip=ff,
)
) )
f_new = pytensor.function( f_new = pytensor.function(
[self.x, self.output_grad_wrt], conv_wrt_w_out [self.x, self.output_grad_wrt], conv_wrt_w_out
...@@ -2062,12 +1879,12 @@ class TestConv2dGrads: ...@@ -2062,12 +1879,12 @@ class TestConv2dGrads:
reason="SciPy and cxx needed", reason="SciPy and cxx needed",
) )
class TestGroupedConvNoOptim: class TestGroupedConvNoOptim:
conv = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv = abstract_conv.AbstractConv2d
conv_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv_gradw = abstract_conv.AbstractConv2d_gradWeights
conv_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv_gradi = abstract_conv.AbstractConv2d_gradInputs
conv_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv_op = abstract_conv.AbstractConv2d
conv_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer=None) mode = Mode(optimizer=None)
is_dnn = False is_dnn = False
...@@ -2266,12 +2083,12 @@ class TestGroupedConvNoOptim: ...@@ -2266,12 +2083,12 @@ class TestGroupedConvNoOptim:
reason="SciPy and cxx needed", reason="SciPy and cxx needed",
) )
class TestGroupedConv3dNoOptim(TestGroupedConvNoOptim): class TestGroupedConv3dNoOptim(TestGroupedConvNoOptim):
conv = pytensor.tensor.nnet.abstract_conv.AbstractConv3d conv = abstract_conv.AbstractConv3d
conv_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights conv_gradw = abstract_conv.AbstractConv3d_gradWeights
conv_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs conv_gradi = abstract_conv.AbstractConv3d_gradInputs
conv_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d conv_op = abstract_conv.AbstractConv3d
conv_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights conv_gradw_op = abstract_conv.AbstractConv3d_gradWeights
conv_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs conv_gradi_op = abstract_conv.AbstractConv3d_gradInputs
mode = Mode(optimizer=None) mode = Mode(optimizer=None)
def setup_method(self): def setup_method(self):
...@@ -2505,12 +2322,12 @@ class TestSeparableConv: ...@@ -2505,12 +2322,12 @@ class TestSeparableConv:
reason="SciPy and cxx needed", reason="SciPy and cxx needed",
) )
class TestUnsharedConv: class TestUnsharedConv:
conv2d = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv2d = abstract_conv.AbstractConv2d
conv2d_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
conv2d_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv2d_op = abstract_conv.AbstractConv2d
conv2d_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer="None") mode = Mode(optimizer="None")
...@@ -2733,12 +2550,12 @@ class TestUnsharedConv: ...@@ -2733,12 +2550,12 @@ class TestUnsharedConv:
class TestAsymmetricPadding: class TestAsymmetricPadding:
conv2d = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv2d = abstract_conv.AbstractConv2d
conv2d_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
conv2d_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d conv2d_op = abstract_conv.AbstractConv2d
conv2d_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
mode = Mode(optimizer="None") mode = Mode(optimizer="None")
......
import time
import numpy as np
from pytensor import function
from pytensor.compile.mode import Mode
from pytensor.tensor.nnet.conv import ConvOp
from pytensor.tensor.type import TensorType, dmatrix
def flip(kern, kshp):
"flip the kernel as scipy.convolv2d do it flipped."
flip = np.zeros(kern.shape)
if len(kern.shape) == 2:
kern = kern.reshape(-1)
it = reversed(kern)
for i in range(kshp[0]):
for j in range(kshp[1]):
flip[i, j] = next(it)
elif len(kern.shape) == 3:
kern = kern.reshape(kern.shape[0], -1)
for k in range(kern.shape[0]):
it = reversed(kern[k, :])
for i in range(kshp[0]):
for j in range(kshp[1]):
flip[k, i, j] = next(it)
elif len(kern.shape) == 4:
kern = kern.reshape(kern.shape[0], kern.shape[1], -1)
for k in range(kern.shape[0]):
for m in range(kern.shape[1]):
it = reversed(kern[k, m, :])
for i in range(kshp[0]):
for j in range(kshp[1]):
flip[k, m, i, j] = next(it)
else:
raise NotImplementedError()
return flip
global_rng = np.random.default_rng(3423489)
dmatrix4 = TensorType("float64", shape=(None, None, None, None))
def exec_multilayer_conv_nnet_old(
conv_mode,
ss,
bsize,
imshp,
kshps,
nkerns,
unroll_batch=0,
unroll_kern=0,
img=None,
validate=True,
conv_op_py=False,
do_print=True,
repeat=1,
unroll_patch=False,
unroll_patch_size=False,
verbose=0,
):
if img is None:
img = dmatrix()
# build actual input images
imgval = global_rng.random((bsize, imshp[0], imshp[1], imshp[2]))
a = dmatrix()
kerns = [a for i in nkerns]
inputs4 = dmatrix4()
kerns4 = dmatrix4()
# for each layer
ntot = 0
tctot = 0
tpytot = 0
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
if do_print:
print("************* layer %i ***************" % n_layer)
print(conv_mode, ss, n_layer, kshp, nkern)
# actual values
w = global_rng.random(np.r_[nkern, imshp[0], kshp])
w_flip = flip(w, kshp).reshape(w.shape)
# manual implementation
# check first stage
padimg = imgval
if conv_mode == "full":
padimg_shp = np.array(imshp[1:]) + 2 * (np.array(kshp) - np.array([1, 1]))
padimg = np.zeros(np.r_[bsize, imshp[0], padimg_shp])
padimg[
:, :, kshp[0] - 1 : -kshp[0] + 1, kshp[1] - 1 : -kshp[1] + 1
] = imgval
outshp = np.hstack(
(nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
)
time1 = time.perf_counter()
outval = np.zeros(np.r_[bsize, outshp])
if validate:
# causes an atexit problem
try:
from scipy.signal.signaltools import _bvalfromboundary, _valfrommode
from scipy.signal.sigtools import _convolve2d
except ImportError:
from scipy.signal._signaltools import _bvalfromboundary, _valfrommode
from scipy.signal._sigtools import _convolve2d
val = _valfrommode(conv_mode)
bval = _bvalfromboundary("fill")
for b in range(bsize): # loop over batches
for n in range(nkern): # loop over filters
for i in range(imshp[0]): # loop over input feature maps
outval[b, n, ...] += _convolve2d(
imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0
)[0 :: ss[0], 0 :: ss[1]]
ntot += time.perf_counter() - time1
# ConvOp
if unroll_patch and not unroll_patch_size:
conv_op = ConvOp(
dx=ss[0],
dy=ss[1],
output_mode=conv_mode,
unroll_patch=unroll_patch,
verbose=verbose,
)(inputs4, kerns4)
else:
conv_op = ConvOp(
imshp,
kshp,
nkern,
bsize,
ss[0],
ss[1],
conv_mode,
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
verbose=verbose,
)(inputs4, kerns4)
# l1shp = np.hstack((nkern,
# ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
time1 = time.perf_counter()
for i in range(repeat):
hidval2_ = propup2(imgval, w_flip)
hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]]
tctot += time.perf_counter() - time1
if conv_op_py:
time1 = time.perf_counter()
for i in range(repeat):
hidval3_ = propup3(imgval, w_flip)
hidval3 = hidval3_ # [:,:,0::ss[0],0::ss[1]]
tpytot += time.perf_counter() - time1
assert (np.abs(hidval2 - hidval3) < 1e-5).all()
else:
tpytot += 0
if validate:
temp = np.abs(outval - hidval2)
assert (temp < 1e-5).all()
if validate and conv_op_py:
temp = np.abs(outval - hidval3)
assert (temp < 1e-5).all()
imshp = tuple(outshp)
imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
return tctot, tpytot, ntot
def exec_multilayer_conv_nnet(
conv_mode,
ss,
bsize,
imshp,
kshps,
nkerns,
unroll_batch=0,
unroll_kern=0,
img=None,
do_print=True,
repeat=1,
unroll_patch=False,
unroll_patch_size=False,
verbose=0,
):
if img is None:
img = dmatrix()
# build actual input images
imgval = global_rng.random((bsize, imshp[0], imshp[1], imshp[2]))
a = dmatrix()
kerns = [a for i in nkerns]
inputs4 = dmatrix4()
kerns4 = dmatrix4()
# for each layer
ntot = 0
tctot = 0
tpytot = 0
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
if do_print:
print("************* layer %i ***************" % n_layer)
print(conv_mode, ss, n_layer, kshp, nkern)
# actual values
w = global_rng.random(np.r_[nkern, imshp[0], kshp])
w_flip = flip(w, kshp).reshape(w.shape)
outshp = np.hstack(
(nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
)
time1 = time.perf_counter()
# outval = np.zeros(np.r_[bsize, outshp])
# ConvOp
if unroll_patch and not unroll_patch_size:
conv_op = ConvOp(
dx=ss[0],
dy=ss[1],
output_mode=conv_mode,
unroll_patch=unroll_patch,
verbose=verbose,
)(inputs4, kerns4)
else:
conv_op = ConvOp(
imshp,
kshp,
nkern,
bsize,
ss[0],
ss[1],
conv_mode,
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
verbose=verbose,
)(inputs4, kerns4)
# l1shp = np.hstack((nkern,
# ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
time1 = time.perf_counter()
for i in range(repeat):
propup2(imgval, w_flip)
tctot += time.perf_counter() - time1
imshp = tuple(outshp)
# imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
return tctot, tpytot, ntot
def speed_multilayer_conv():
# calculate the speed up of different combination of unroll
# put the parameter to the same you will try.
# validate = False # we don't validate the result to have it much faster!
repeat = 3
verbose = 1
unroll_batch = [1, 2, 3, 4, 5, 6, 10] # 15, 30, 60 always much slower
unroll_kern = [1, 2, 3, 4, 5, 6, 10] # 15, 30, 60 always much slower
# unroll_batch = [1,4,5]
# unroll_kern = [1,4,5]
# unroll_batch = [1,4]
# unroll_kern = [1,4]
# unroll_patch = [True, False]
bsize = 60 # batch size
imshp_start = (1, 48, 48) # un square shape to test more corner case.
kshps = ([11, 12],) # un square shape to test more corner case.
nkerns = [60] # per output pixel
ssizes = [
(1, 1),
] # (1,1)]#(2,2) bugged
convmodes = ["valid", "full"]
# do_convolve2 = False
a = dmatrix()
kerns = [a for i in nkerns]
assert len(kshps) == len(nkerns) == len(kerns)
timing = np.zeros(
(len(unroll_batch), len(unroll_kern), 3, len(convmodes) * len(ssizes))
)
t_b_k = []
# calculate the timing with unrolling
print("time unroll batch kern")
best = []
worst = []
t_ = []
for unroll_b, n_b in zip(unroll_batch, range(len(unroll_batch))):
for unroll_k, n_k in zip(unroll_kern, range(len(unroll_kern))):
t_b_k.append(str(unroll_b) + "/" + str(unroll_k))
if not t_:
tctot, tpytot, ntot = [], [], []
for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
for ss, n_ss in zip(ssizes, range(len(ssizes))):
# tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False)
tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
conv_mode,
ss,
bsize,
imshp_start,
kshps,
nkerns,
unroll_batch=unroll_b,
unroll_kern=unroll_k,
verbose=verbose,
do_print=False,
repeat=repeat,
)
tctot += [tctot_]
tpytot += [tpytot_]
ntot += [ntot_]
if unroll_b == 4 and unroll_k == 4:
# print "unroll 4/4",tctot
best = tctot
if unroll_b == 1 and unroll_k == 1:
# print "unroll 1/1",tctot
worst = tctot
timing[n_b, n_k] = [
tctot,
tpytot,
ntot,
] # [sum(tctot), sum(tpytot), sum(ntot)]
if not t_:
t = timing[:, :, 0, :] # We select only the c timing.
else:
t = t_
t = np.asarray(t)
# calculate the old timing
print("time old version")
tctot, tpytot, ntot = [], [], []
tctot_ = []
if not tctot_:
for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
for ss, n_ss in zip(ssizes, range(len(ssizes))):
# tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False)
tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
conv_mode,
ss,
bsize,
imshp_start,
kshps,
nkerns,
unroll_batch=0,
unroll_kern=0,
verbose=verbose,
do_print=False,
repeat=repeat,
)
tctot += [tctot_]
tpytot += [tpytot_]
ntot += [ntot_]
else:
tctot = np.asarray(tctot_)
print(f"old code timing {sum(tctot):.3f}s", tctot)
best = np.asarray(best)
worst = np.asarray(worst)
print("timing for unrolled version")
print("unroll_batch/unroll_kern valid_mode full_mode")
for n_b in range(len(unroll_batch)):
for n_k in range(len(unroll_kern)):
print((unroll_batch[n_b], unroll_kern[n_k]) + tuple(t[n_b, n_k]), ",")
# t_detail = t
t = t.sum(axis=2)
print(
f"max {t.max():.3f}s",
"max param(batch unloop size/kernel unloop size)",
t_b_k[t.argmax()],
)
print(
f"min {t.min():.3f}s",
"min param(batch unloop size/kernel unloop size)",
t_b_k[t.argmin()],
)
print(
f"speedup vs (1/1){t.max() / t.min():.3f}x, vs old {sum(tctot) / t.min():.3f}x"
)
print(worst / best, tctot / best)
# calculate the timing of unroll_patch
print("time unroll_patch")
tctot_patch = []
tctot_patch_size = []
for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
for ss, n_ss in zip(ssizes, range(len(ssizes))):
# tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False)
tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
conv_mode,
ss,
bsize,
imshp_start,
kshps,
nkerns,
unroll_batch=0,
unroll_kern=0,
unroll_patch=True,
verbose=verbose,
do_print=False,
repeat=repeat,
)
tctot_patch += [tctot_]
# tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False,unroll_patch_size=True)
tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
conv_mode,
ss,
bsize,
imshp_start,
kshps,
nkerns,
unroll_batch=0,
unroll_kern=0,
unroll_patch=True,
verbose=verbose,
do_print=False,
unroll_patch_size=True,
repeat=repeat,
)
tctot_patch_size += [tctot_]
t_patch = sum(tctot_patch)
print("unroll_patch without shape time", tctot_patch)
print(
f"speedup vs (1/1){t.max() / t_patch:.3f}x, vs old {sum(tctot) / t_patch:.3f}x"
)
print(best / tctot_patch, worst / tctot_patch)
t_patch_size = sum(tctot_patch_size)
print("unroll_patch with shape time", tctot_patch_size)
print(
"speedup vs (1/1)%.3fx, vs old %.3fx"
% (t.max() / t_patch_size, sum(tctot) / t_patch_size)
)
print(best / tctot_patch_size, worst / tctot_patch_size)
return
if __name__ == "__main__":
speed_multilayer_conv()
from contextlib import ExitStack as does_not_raise
import numpy as np
import pytest
import scipy.special as sp
import pytensor
import pytensor.tensor as at
from pytensor.compile.mode import OPT_FAST_RUN, optdb
from pytensor.configdefaults import config
from pytensor.gradient import grad
from pytensor.graph.fg import FunctionGraph
from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
from pytensor.tensor.math import (
Argmax,
add,
argmax,
dot,
exp,
log,
max_and_argmax,
mean,
sigmoid,
)
from pytensor.tensor.math import sum as at_sum
from pytensor.tensor.math import tanh
from pytensor.tensor.nnet.basic import (
CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad,
CrossentropySoftmax1HotWithBiasDx,
CrossentropySoftmaxArgmax1HotWithBias,
Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row,
Softmax,
SoftmaxGrad,
SoftmaxWithBias,
binary_crossentropy,
categorical_crossentropy,
confusion_matrix,
crossentropy_categorical_1hot,
crossentropy_softmax_1hot,
crossentropy_softmax_1hot_with_bias,
crossentropy_softmax_1hot_with_bias_dx,
crossentropy_softmax_argmax_1hot_with_bias,
elu,
h_softmax,
relu,
selu,
sigmoid_binary_crossentropy,
softmax,
softmax_grad_legacy,
softmax_legacy,
softmax_with_bias,
softsign,
)
from pytensor.tensor.shape import shape_padleft
from pytensor.tensor.subtensor import AdvancedSubtensor
from pytensor.tensor.type import (
dmatrix,
dvector,
fmatrix,
fvector,
ivector,
lvector,
matrix,
scalar,
tensor3,
tensor4,
vector,
vectors,
)
from tests import unittest_tools as utt
from tests.tensor.utils import (
_good_broadcast_unary_normal_float_no_complex,
check_floatX,
makeBroadcastTester,
upcast_int8_nfunc,
)
def softmax_graph(c):
return exp(c) / exp(c).sum(axis=-1, keepdims=True)
def valid_axis_tester(Op):
with pytest.raises(TypeError):
Op(1.5)
x = [tensor3()] * Op.nin
with does_not_raise():
Op(2)(*x)
with pytest.raises(ValueError):
Op(3)(*x)
with does_not_raise():
Op(-3)(*x)
with pytest.raises(ValueError):
Op(-4)(*x)
class TestSoftmaxWithBias(utt.InferShapeTester):
def test_basic(self):
def f(a, b):
return softmax_with_bias(a, b)[:, 0]
rng = np.random.default_rng(utt.fetch_seed())
utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
def f(a, b):
return softmax_with_bias(a, b)[:, 1]
utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
def f(a, b):
return softmax_with_bias(a, b)[:, 2]
utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
def f(a, b):
return softmax_with_bias(a, b)[:, 3]
utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
def test_broadcast(self):
"""
Test that we don't raise an error during rewriting for no good reason
as `softmax_with_bias` don't support correctly some/all broadcasted
inputs pattern.
"""
initial_W = np.asarray(
[[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]],
dtype=config.floatX,
)
W = pytensor.shared(value=initial_W, name="W")
vbias = pytensor.shared(value=0.1, name="vbias") # 0.01
hid = vector("hid")
f = pytensor.function([hid], softmax_legacy(dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops
assert softmax_legacy in ops
f([0, 1, 0])
# print f.maker.fgraph.toposort()
def test_softmax_with_bias_trace(self):
rng = np.random.default_rng(utt.fetch_seed())
a = pytensor.shared(rng.standard_normal((3,)).astype(config.floatX))
b = pytensor.shared(np.float32(rng.standard_normal()))
sm = softmax(a + b)
f = pytensor.function([], sm)
assert check_stack_trace(f, ops_to_check="last")
def test_infer_shape(self):
admat = matrix()
advec = vector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((3, 4)).astype(config.floatX)
advec_val = rng.random(4).astype(config.floatX)
self._compile_and_check(
[admat, advec],
[SoftmaxWithBias()(admat, advec)],
[admat_val, advec_val],
SoftmaxWithBias,
)
class TestCrossEntropySoftmax1Hot:
def test_basic(self):
y_idx = [0, 1, 3]
def f(a, b):
return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
rng = np.random.default_rng(utt.fetch_seed())
utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
y_idx = [0, 1, 3]
def f(a):
return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [rng.random((3, 4))])
def test_vector(self):
y_idx = [3]
def f(a):
return crossentropy_softmax_1hot(shape_padleft(a), y_idx)[0]
rng = np.random.default_rng(utt.fetch_seed())
utt.verify_grad(f, [rng.random((4,))])
def test_vectors(self):
y_idx = [3]
def f(a, b):
return crossentropy_softmax_1hot(shape_padleft(a) + b, y_idx)[0]
rng = np.random.default_rng(utt.fetch_seed())
utt.verify_grad(f, [rng.random((4,)), rng.random(4)])
class TestCrossEntropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test_basic(self):
rng = np.random.default_rng(utt.fetch_seed())
def ff(class_dtype):
def f(sm):
# Class indices
y = rng.integers(low=0, high=5, size=10).astype(class_dtype)
return crossentropy_softmax_1hot_with_bias_dx(
rng.random(10),
sm,
y, # Gradient w.r.t. NLL. # Softmax output.
)
return f
# Build a random softmax output whose rows sum to 1.
softmax_output = rng.random((10, 5))
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
for dtype in ["uint8", "int8", "uint64", "int64"]:
utt.verify_grad(ff(dtype), [softmax_output])
def test_basic_2(self):
rng = np.random.default_rng(utt.fetch_seed())
softmax_output = rng.random((10, 5))
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy):
return crossentropy_softmax_1hot_with_bias_dx(
dy, softmax_output, rng.integers(low=0, high=5, size=10)
)
utt.verify_grad(f, [rng.random(10)])
def test_infer_shape(self):
admat = matrix()
advec = vector()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((10, 5)).astype(config.floatX)
admat_val /= admat_val.sum(axis=1).reshape(10, 1)
advec_val = rng.random(10).astype(config.floatX)
alvec_val = rng.integers(low=0, high=5, size=10)
self._compile_and_check(
[advec, admat, alvec],
[CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropySoftmax1HotWithBiasDx,
)
def test_neg_idx(self):
admat = matrix()
advec = vector()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((10, 5)).astype(config.floatX)
admat_val /= admat_val.sum(axis=1).reshape(10, 1)
advec_val = rng.random(10).astype(config.floatX)
alvec_val = rng.integers(low=0, high=5, size=10)
alvec_val[1] = -1
out = CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)
f = pytensor.function([advec, admat, alvec], out)
with pytest.raises(ValueError):
f(advec_val, admat_val, alvec_val)
class TestCrossEntropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def setup_method(self):
self.op = crossentropy_softmax_argmax_1hot_with_bias
super().setup_method()
def test_grads(self):
n_classes = 5
n_samples = 3
rng = np.random.default_rng(utt.fetch_seed())
# First test gradient when getting a gradient on the NLL output.
def grad_on_nll_dtype(dtype):
def grad_on_nll(x, b):
y_idx = rng.integers(low=0, high=n_classes, size=n_samples).astype(
dtype
)
return self.op(x, b, y_idx=y_idx)[0]
return grad_on_nll
for dtype in ["uint8", "int8", "uint64", "int64"]:
utt.verify_grad(
grad_on_nll_dtype(dtype),
[
rng.random((n_samples, n_classes)),
rng.random(n_classes),
],
)
# Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b):
return self.op(
x,
b,
y_idx=rng.integers(low=0, high=n_classes, size=n_samples),
)[1]
utt.verify_grad(
grad_on_softmax,
[rng.random((n_samples, n_classes)), rng.random(n_classes)],
)
def test_infer_shape(self):
admat = matrix()
advec = vector()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((3, 5)).astype(config.floatX)
advec_val = rng.random(5).astype(config.floatX)
alvec_val = rng.integers(low=0, high=5, size=3)
self._compile_and_check(
[admat, advec, alvec],
CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
[admat_val, advec_val, alvec_val],
CrossentropySoftmaxArgmax1HotWithBias,
)
def test_neg_idx(self):
admat = matrix()
advec = vector()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((3, 5)).astype(config.floatX)
advec_val = rng.random(5).astype(config.floatX)
alvec_val = rng.integers(low=0, high=5, size=3)
alvec_val[1] = -1
out = CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec)
f = pytensor.function([admat, advec, alvec], out)
with pytest.raises(ValueError):
f(admat_val, advec_val, alvec_val)
class TestPrepend(utt.InferShapeTester):
def test_prepend_constant(self):
x = matrix("x")
y = Prepend_scalar_constant_to_each_row(4.0)(x)
f = pytensor.function([x], y)
rng = np.random.default_rng(utt.fetch_seed())
m = rng.random((3, 5)).astype(config.floatX)
my = f(m)
assert my.shape == (3, 6)
assert np.all(my[:, 0] == 4.0)
def test_prepend_basic(self):
"""Test basic functionality."""
x = matrix("x")
y = Prepend_scalar_to_each_row()(5.0, x)
f = pytensor.function([x], y)
m = np.ones((3, 5), dtype="float32")
my = f(m)
assert my.shape == (3, 6)
assert np.all(my[:, 0] == 5.0)
def test_infer_shape(self):
admat = matrix()
adscal = scalar()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((3, 5)).astype(config.floatX)
adscal_val = np.asarray(rng.random(), dtype=config.floatX).item()
self._compile_and_check(
[admat],
[Prepend_scalar_constant_to_each_row(adscal_val)(admat)],
[admat_val],
Prepend_scalar_constant_to_each_row,
)
self._compile_and_check(
[adscal, admat],
[Prepend_scalar_to_each_row()(adscal, admat)],
[adscal_val, admat_val],
Prepend_scalar_to_each_row,
)
class TestCrossEntropyCategorical1HotGrad(utt.InferShapeTester):
def test_infer_shape(self):
advec = vector()
admat = matrix()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
advec_val = rng.random(3).astype(config.floatX)
admat_val = rng.random((3, 2)).astype(config.floatX)
alvec_val = [0, 1, 0]
self._compile_and_check(
[advec, admat, alvec],
[CrossentropyCategorical1HotGrad()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropyCategorical1HotGrad,
)
class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
def test_input_validation(self):
with pytest.raises(TypeError, match="Matrix.*"):
crossentropy_categorical_1hot(vector(), lvector())
with pytest.raises(TypeError, match="Integer.*"):
crossentropy_categorical_1hot(matrix(), vector())
def test_grad(self):
x = matrix("x")
one_of_n = lvector("one_of_n")
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
f = pytensor.function([x, one_of_n], xe)
x_val = np.asarray([[0.4, 0.6, 0.0], [0.1, 0.8, 0.1]], dtype=config.floatX)
xe_val = f(x_val, [0, 1])
assert np.allclose(xe_val, -np.log([0.4, 0.8]))
def oplike(x):
return op(x, [0, 1])
rng = np.random.default_rng(utt.fetch_seed())
utt.verify_grad(oplike, [x_val], rng=rng)
def test_infer_shape(self):
admat = matrix()
alvec = lvector()
rng = np.random.default_rng(utt.fetch_seed())
admat_val = rng.random((3, 2)).astype(config.floatX)
alvec_val = [0, 1, 0]
self._compile_and_check(
[admat, alvec],
[CrossentropyCategorical1Hot()(admat, alvec)],
[admat_val, alvec_val],
CrossentropyCategorical1Hot,
)
def test_softmax_rewrites(self):
x = matrix("x")
one_of_n = lvector("one_of_n")
op = crossentropy_categorical_1hot
# xe = op(x, one_of_n)
fgraph = FunctionGraph([x, one_of_n], [op(softmax_legacy(x), one_of_n)])
assert fgraph.outputs[0].owner.op == op
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_rewrites_w_bias(self):
x = matrix("x")
b = vector("b")
one_of_n = lvector("one_of_n")
op = crossentropy_categorical_1hot
fgraph = FunctionGraph([x, b, one_of_n], [op(softmax_legacy(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert len(fgraph.toposort()) == 1
assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_rewrites_w_bias2(self):
x = matrix("x")
b = vector("b")
c = vector("c")
one_of_n = lvector("one_of_n")
op = crossentropy_categorical_1hot
fgraph = FunctionGraph(
[x, b, c, one_of_n], [op(softmax_legacy(add(x, b, c)), one_of_n)]
)
assert fgraph.outputs[0].owner.op == op
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert len(fgraph.toposort()) == 2
assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_grad_rewrites(self):
x = matrix("x")
one_of_n = lvector("one_of_n")
op = crossentropy_categorical_1hot
xe = op(softmax_legacy(x), one_of_n)
sum_xe = at_sum(xe)
g_x = grad(sum_xe, x)
fgraph = FunctionGraph([x, one_of_n], [g_x])
assert check_stack_trace(
fgraph,
ops_to_check=[crossentropy_softmax_1hot_with_bias_dx, softmax_legacy],
)
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = {node.op for node in fgraph.toposort()}
assert crossentropy_softmax_argmax_1hot_with_bias not in ops
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
def test_get_rid_of_advanced_indexing_version_of_xent(self):
x = matrix("x")
b = vector("b")
y = lvector("y")
# Basic case
expressions = [
at_sum(-log(softmax(x)[at.arange(y.shape[0]), y])),
-at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
-at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
at_sum(-log(softmax(x))[at.arange(y.shape[0]), y]),
]
for expr in expressions:
fgraph = FunctionGraph([x, y], [expr])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 4
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
# Also verify the gradient wrt x
fgraph = FunctionGraph([x, y], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 2
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
# Test that a biased softmax is rewritten correctly
bias_expressions = [
at_sum(-log(softmax(x + b)[at.arange(y.shape[0]), y])),
-at_sum(log(softmax(b + x)[at.arange(y.shape[0]), y])),
-at_sum(log(softmax(x + b))[at.arange(y.shape[0]), y]),
at_sum(-log(softmax(b + x))[at.arange(y.shape[0]), y]),
]
for expr in bias_expressions:
fgraph = FunctionGraph([x, b, y], [expr, x])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 2 # [big_op, sum]
assert crossentropy_softmax_argmax_1hot_with_bias in ops
fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 2
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_with_bias in ops
assert softmax_grad_legacy not in ops
# Test that using "mean" instead of sum works, too
mean_expressions = [
mean(-log(softmax(x)[at.arange(y.shape[0]), y])),
-mean(log(softmax(x)[at.arange(y.shape[0]), y])),
-mean(log(softmax(x))[at.arange(y.shape[0]), y]),
mean(-log(softmax(x))[at.arange(y.shape[0]), y]),
]
for expr in mean_expressions:
fgraph = FunctionGraph([x, y], [expr])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 6
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
fgraph = FunctionGraph([x, y], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 5
# there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
mean_bias_expressions = [
mean(-log(softmax(x + b)[at.arange(y.shape[0]), y])),
-mean(log(softmax(b + x)[at.arange(y.shape[0]), y])),
-mean(log(softmax(x + b))[at.arange(y.shape[0]), y]),
mean(-log(softmax(b + x))[at.arange(y.shape[0]), y]),
]
for expr in mean_bias_expressions:
fgraph = FunctionGraph([x, b, y], [expr])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 4
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 5
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_with_bias in ops
assert softmax_grad_legacy not in ops
def test_xent_thing_int32(self):
x = matrix("x")
y = lvector("y")
yi = at.cast(y, "int32")
expressions = [
at_sum(-log(softmax(x)[at.arange(yi.shape[0]), yi])),
-at_sum(log(softmax(x)[at.arange(yi.shape[0]), yi])),
-at_sum(log(softmax(x))[at.arange(yi.shape[0]), yi]),
at_sum(-log(softmax(x))[at.arange(yi.shape[0]), yi]),
]
for expr in expressions:
fgraph = FunctionGraph([x, y], [expr])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 5
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
# Also verify the gradient wrt x
fgraph = FunctionGraph([x, y], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
ops = [node.op for node in fgraph.toposort()]
assert len(ops) == 3
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
def test_crossentropy_softmax_1hot_with_bias_dxcale_cost(self):
x = matrix("x")
y = lvector("y")
a = scalar("a")
def validate_grad_graph(func):
# The graph of the gradient should not have softmaxgrad anymore
has_cx1hotdx = False
has_softmax = False
has_softmaxdx = False
for node in func.maker.fgraph.toposort():
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax_legacy:
has_softmax = True
if node.op == softmax_grad_legacy:
has_softmaxdx = True
assert has_cx1hotdx
assert has_softmax
assert not has_softmaxdx
# Cases to test
expressions = [
a * at_sum(-log(softmax(x)[at.arange(y.shape[0]), y])),
-a * at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
a * (-at_sum(log(softmax(x)[at.arange(y.shape[0]), y]))),
a * at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
a * at_sum(-log(softmax(x))[at.arange(y.shape[0]), y]),
-a * at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
a * (-at_sum(log(softmax(x))[at.arange(y.shape[0]), y])),
a * at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
a * mean(-log(softmax(x)[at.arange(y.shape[0]), y])),
-a * mean(log(softmax(x)[at.arange(y.shape[0]), y])),
a * (-mean(log(softmax(x)[at.arange(y.shape[0]), y]))),
a * mean(log(softmax(x)[at.arange(y.shape[0]), y])),
a * mean(-log(softmax(x))[at.arange(y.shape[0]), y]),
-a * mean(log(softmax(x))[at.arange(y.shape[0]), y]),
a * (-mean(log(softmax(x))[at.arange(y.shape[0]), y])),
a * mean(log(softmax(x))[at.arange(y.shape[0]), y]),
]
for expr in expressions:
fgraph = FunctionGraph([x, y, a], [expr])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert 5 <= len(fgraph.toposort()) <= 10
ops = {node.op for node in fgraph.toposort()}
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert softmax_legacy not in ops
# Verify the gradient wrt x
fgraph = FunctionGraph([x, y, a], [grad(expr, x)])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert 3 <= len(fgraph.toposort()) <= 6
ops = {node.op for node in fgraph.toposort()}
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
# Verify the gradient when providing output gradient
fgraph = FunctionGraph(
[x, y, a], [grad(expr, x, known_grads={expr: a * x.sum()})]
)
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert 6 <= len(fgraph.toposort()) <= 8
ops = {node.op for node in fgraph.toposort()}
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_legacy in ops
assert softmax_grad_legacy not in ops
def test_argmax_pushdown():
x = matrix()
for sm in [softmax_graph, softmax_legacy]:
# test that the max_and_argmax is pushed down if the max is not used
out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[1]
fgraph = FunctionGraph([x], [out])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
# print 'AFTER'
# for node in fgraph.toposort():
# print node.op
assert len(fgraph.toposort()) == 1
assert isinstance(fgraph.toposort()[0].op, Argmax)
assert check_stack_trace(fgraph, ops_to_check=Argmax)
x = matrix()
# test that the max_and_argmax is not pushed down if the max is used
out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[0]
fgraph = FunctionGraph([x], [out])
assert hasattr(fgraph.outputs[0].tag, "trace")
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
# print 'AFTER'
# for node in fgraph.toposort():
# print node.op
assert len(fgraph.toposort()) == 3
assert isinstance(fgraph.toposort()[0].op, Elemwise)
assert isinstance(fgraph.toposort()[1].op, Softmax)
assert isinstance(fgraph.toposort()[2].op, CAReduce)
assert isinstance(
fgraph.toposort()[2].op.scalar_op, pytensor.scalar.ScalarMaximum
)
def test_argmax_pushdown_bias():
x = matrix()
b = vector()
out = argmax(softmax_with_bias(x, b), axis=-1)
fgraph = FunctionGraph([x, b], [out])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
types_to_check = (DimShuffle, Elemwise, Argmax)
assert len(fgraph.toposort()) == 3
for i, type in enumerate(types_to_check):
assert isinstance(fgraph.toposort()[i].op, type)
assert check_stack_trace(fgraph, ops_to_check=types_to_check)
x = matrix()
b = vector()
out = max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
fgraph = FunctionGraph([x, b], [out])
optdb.query(OPT_FAST_RUN).rewrite(fgraph)
assert len(fgraph.toposort()) == 2
assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias)
assert isinstance(fgraph.toposort()[1].op, CAReduce)
assert isinstance(fgraph.toposort()[1].op.scalar_op, pytensor.scalar.ScalarMaximum)
assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, CAReduce))
def test_asymptotic_32():
"""Test that our functions behave sensibly when huge values are present."""
# TODO: consider adding the rewrite of crossentropy into the current
# mode for the purpose of running this test
for dtype in "float32", "float64":
if dtype == "float32":
x = fmatrix()
x2 = fvector()
else:
x = dmatrix()
x2 = dvector()
y = lvector()
c = categorical_crossentropy(softmax(x + x2), y)
f = pytensor.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN")
xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
for i in range(100):
cval, gxval = f(xval, np.arange(5), x2val)
xval -= 100.3 * gxval
assert cval == 0 # no problem going to zero error
# what about when x gets really big?
xval = np.zeros((5, 5), dtype=dtype)
x2val = np.zeros(5, dtype=xval.dtype)
for i in range(100):
cval, gxval = f(xval, np.arange(5), x2val)
xval += 100000.3 * gxval
assert cval > 61750000
assert gxval[0, 0] == -1.0
assert gxval[0, 1] == 0.25
class TestSoftmaxRewrite:
"""
Test that expressions of softmax in terms of exponentiated things
divided by row sums are replaced by softmax expressions.
`Softmax_grad` isn't that interesting as an Op, but it has the signature
we look for when trying to insert `CrossEntropySoftmax` grad. So, for
now, we add `softmax_grad` to graphs. In the future, we may modify the
`CrossEntropySoftmax` grad to look for the more basic pattern.
"""
def setup_method(self):
self.mode = pytensor.compile.mode.get_default_mode()
self.mode = self.mode.including("canonicalize")
@pytest.mark.parametrize("axis", [None, 0, 1, -1, (0, 1)])
def test_basic(self, axis):
c = matrix()
if axis is None:
p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
elif axis == 0:
p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", 0)
elif axis == (0, 1):
p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
else:
p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle(0, "x")
# test that function contains softmax and no div.
f = pytensor.function([c], p_y, mode=self.mode)
assert check_stack_trace(f, ops_to_check=Softmax)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
assert len(f_ops) == 1
assert isinstance(f_ops[0], Softmax)
rng = np.random.default_rng(utt.fetch_seed())
c_val = rng.random((3, 4)).astype(config.floatX)
assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
@pytest.mark.parametrize("axis", [None, 0, 1, 2, -1, -2, -3, (0, 1, 2)])
def test_basic_keepdims(self, axis):
c = tensor3()
p_y = exp(c) / exp(c).sum(axis=axis, keepdims=True)
# test that function contains softmax and no div.
f = pytensor.function([c], p_y, mode=self.mode)
assert check_stack_trace(f, ops_to_check=Softmax)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
assert len(f_ops) == 1
assert isinstance(f_ops[0], Softmax)
rng = np.random.default_rng(utt.fetch_seed())
c_val = rng.random((3, 4, 5)).astype(config.floatX)
assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
@pytest.mark.skip(reason="Rewrite not enabled for the moment")
def test_grad(self):
c = matrix()
p_y = exp(c) / exp(c).sum(axis=1).dimshuffle(0, "x")
# test that function contains softmax and softmaxgrad
w = matrix()
g = pytensor.function([c, w], grad((p_y * w).sum(), c), mode=self.mode)
g_ops = [n.op for n in g.maker.fgraph.toposort()]
assert len(g_ops) == 2, g_ops
assert isinstance(g_ops[0], Softmax)
assert isinstance(g_ops[1], SoftmaxGrad)
rng = np.random.default_rng(utt.fetch_seed())
g(rng.random((3, 4)), rng.uniform(0.5, 1, (3, 4)))
def test_transpose_basic(self):
# this should be a transposed softmax
c = matrix()
p_y = exp(c) / exp(c).sum(axis=0)
# test that function contains softmax and no div.
f = pytensor.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
assert len(f_ops) == 1
assert isinstance(f_ops[0], Softmax)
@pytest.mark.skip(reason="Rewrite not enabled for the moment")
def test_transpose_grad(self):
# this should be a transposed softmax
c = matrix()
p_y = exp(c) / exp(c).sum(axis=0)
# test that function contains softmax and no div.
g = pytensor.function([c], grad(p_y.sum(), c), mode=self.mode)
g_ops = [n.op for n in g.maker.fgraph.toposort()]
assert len(g_ops) == 2
assert isinstance(g_ops[0], Softmax)
assert isinstance(g_ops[1], SoftmaxGrad)
def test_1d_basic(self):
c = vector()
p_y = exp(c) / exp(c).sum()
# test that function contains softmax and no div.
f = pytensor.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
assert len(f_ops) == 1
assert isinstance(f_ops[0], Softmax)
@pytest.mark.skip(reason="Rewrite not enabled for the moment")
def test_1D_grad(self):
c = vector()
p_y = exp(c) / exp(c).sum()
# test that function contains softmax and no div.
g = pytensor.function([c], grad(p_y.sum(), c), mode=self.mode)
g_ops = [n.op for n in g.maker.fgraph.toposort()]
assert len(g_ops) == 2
assert isinstance(g_ops[0], Softmax)
assert isinstance(g_ops[1], SoftmaxGrad)
@pytest.mark.parametrize(
"f",
[
lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle(0, 1, "x"),
lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 0, 1, "x"),
lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 1, 0),
lambda c: exp(c) / exp(c).sum(axis=(0, 1), keepdims=True),
],
)
def test_invalid_softmax_expressions(self, f):
# Test that graphs are not rewritten into a softmax when a dimshuffle
# swaps or adds extra dimensions, or when more than one but not all axis
# are summed over (which is not allowed by the Softmax Op but otherwise
# valid)
c = tensor3("c")
out = f(c)
f = pytensor.function([c], out, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
assert len(f_ops) > 1
assert not any(isinstance(op, Softmax) for op in f_ops)
def test_softmax_graph():
rng = np.random.default_rng(utt.fetch_seed())
x = pytensor.shared(rng.normal(size=(3, 4)))
def f(inputs):
y = softmax_graph(x)
return pytensor.grad(None, x, known_grads={y: inputs})
utt.verify_grad(f, [rng.random((3, 4))])
def test_grad_softmax_grad():
rng = np.random.default_rng(utt.fetch_seed())
x = pytensor.shared(rng.normal(size=(3, 4)))
def f(inputs):
y = softmax_legacy(x)
return pytensor.grad(None, x, known_grads={y: inputs})
utt.verify_grad(f, [rng.random((3, 4))])
def test_relu():
x = matrix("x")
rng = np.random.default_rng(utt.fetch_seed())
X = rng.standard_normal((20, 30)).astype(config.floatX)
# Test the base case, without custom alpha value
y = relu(x).eval({x: X})
assert np.allclose(y, np.maximum(X, 0))
# Test for different constant alpha values (also outside of [0, 1])
for alpha in 0, 0.3, 1, 2, -0.3, -1, -2:
y = relu(x, alpha).eval({x: X})
assert np.allclose(y, np.where(X > 0, X, alpha * X))
# Test for variable alpha (scalar, vector and matrix)
for alpha in scalar(), vector(), matrix():
# Create value for alpha (correct ndim and broadcastable against X)
A = np.array(
rng.standard_normal(X.shape[::-1][: alpha.ndim][::-1]), dtype=config.floatX
)
y = relu(x, alpha).eval({x: X, alpha: A})
assert np.allclose(y, np.where(X > 0, X, A * X), rtol=3e-5)
# Test that an alpha of type `ndarray` doesn't generate an upcast
x = matrix("x", dtype="float32")
X = rng.standard_normal((20, 30)).astype("float32")
alpha = np.asarray(0.123, dtype="float32")
y = relu(x, alpha).eval({x: X})
assert np.allclose(y, np.where(X > 0, X, alpha * X))
assert y.dtype == "float32"
def test_h_softmax():
"""Tests the output dimensions of the `h_softmax` when a target is provided or not."""
input_size = 4
batch_size = 2
h_softmax_level1_size = 5
h_softmax_level2_size = 3
output_size = h_softmax_level1_size * h_softmax_level2_size
rng = np.random.default_rng(utt.fetch_seed())
# First level of h_softmax
W1 = np.asarray(
rng.normal(size=(input_size, h_softmax_level1_size)), dtype=config.floatX
)
W1 = pytensor.shared(W1)
b1 = pytensor.shared(
np.asarray(np.zeros((h_softmax_level1_size,)), dtype=config.floatX)
)
# Second level of h_softmax
W2 = np.asarray(
rng.normal(size=(h_softmax_level1_size, input_size, h_softmax_level2_size)),
dtype=config.floatX,
)
W2 = pytensor.shared(W2)
b2 = pytensor.shared(
np.asarray(
np.zeros((h_softmax_level1_size, h_softmax_level2_size)),
dtype=config.floatX,
)
)
x = matrix("x")
y = ivector("y")
# This only computes the output corresponding to the target
y_hat_tg = h_softmax(
x,
batch_size,
output_size,
h_softmax_level1_size,
h_softmax_level2_size,
W1,
b1,
W2,
b2,
y,
)
# This computes all the outputs
y_hat_all = h_softmax(
x,
batch_size,
output_size,
h_softmax_level1_size,
h_softmax_level2_size,
W1,
b1,
W2,
b2,
)
fun_output_tg = pytensor.function([x, y], y_hat_tg)
fun_output = pytensor.function([x], y_hat_all)
x_mat = rng.normal(size=(batch_size, input_size)).astype(config.floatX)
y_mat = rng.integers(0, output_size, batch_size).astype("int32")
tg_output = fun_output_tg(x_mat, y_mat)
all_outputs = fun_output(x_mat)
assert tg_output.shape == (batch_size,)
assert all_outputs.shape == (batch_size, output_size)
# Verifies that the outputs computed by fun_output_tg are the same as those
# computed by fun_output.
utt.assert_allclose(all_outputs[np.arange(0, batch_size), y_mat], tg_output)
def test_elu():
x = matrix("x")
rng = np.random.default_rng(utt.fetch_seed())
X = rng.standard_normal((20, 30)).astype(config.floatX)
# test the base case, without custom alpha value
y = elu(x).eval({x: X})
utt.assert_allclose(y, np.where(X > 0, X, np.exp(X) - 1))
# test for different constant alpha values
for alpha in 1.5, 2, -1, -1.5, -2:
y = elu(x, alpha).eval({x: X})
utt.assert_allclose(y, np.where(X > 0, X, alpha * (np.exp(X) - 1)))
def test_selu():
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
x = matrix("x")
rng = np.random.default_rng(utt.fetch_seed())
X = rng.standard_normal((20, 30)).astype(config.floatX)
y = selu(x).eval({x: X})
utt.assert_allclose(y, np.where(X > 0, scale * X, scale * alpha * (np.exp(X) - 1)))
def test_binary_crossentropy_reshape():
# Reported as https://github.com/Theano/Theano/issues/4086
a = tensor4("a")
for c in (
binary_crossentropy(sigmoid(a.reshape((-1, 1))), 1).sum(),
binary_crossentropy(sigmoid(a).reshape((-1, 1)), 1).sum(),
):
ga = pytensor.grad(c, a)
# This only works when "specialize" options are included
mode = pytensor.compile.get_default_mode().including("fast_run")
fga = pytensor.function([a], ga, mode=mode)
utt.assert_allclose(
fga(np.array([[[[30.0]]]], dtype=config.floatX)),
np.zeros((1, 1, 1, 1), dtype=config.floatX),
)
TestSoftsign = makeBroadcastTester(
op=softsign,
expected=upcast_int8_nfunc(
lambda inputs: check_floatX(inputs, inputs / (1.0 + np.fabs(inputs)))
),
good=_good_broadcast_unary_normal_float_no_complex,
name="SoftsignTester",
)
class TestSigmoidBinaryCrossentropy:
def test_matches_binary_crossentropy(self):
# Test sigmoid_binary_crossentropy(p, t) ==
# binary_crossentropy(sigmoid(p), t).
pred, target = inputs = vectors("pt")
reference_val = binary_crossentropy(sigmoid(pred), target)
f_reference = pytensor.function(inputs, reference_val)
test_val = sigmoid_binary_crossentropy(pred, target)
f_test = pytensor.function(inputs, test_val)
rng = np.random.default_rng(utt.fetch_seed())
pred, target = rng.standard_normal((2, 50)).astype(config.floatX)
test_inputs = [pred, 1 / (1 + np.exp(-target))]
utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
def test_grad(self):
rng = np.random.default_rng(utt.fetch_seed())
pred, target = rng.standard_normal((2, 50)).astype(config.floatX)
test_inputs = [pred, 1 / (1 + np.exp(-target))]
utt.verify_grad(sigmoid_binary_crossentropy, test_inputs)
def test_confusion_matrix():
# Defining numpy implementation of confusion matrix
def numpy_conf_mat(actual, pred):
order = np.union1d(actual, pred)
colA = np.matrix(actual).T
colP = np.matrix(pred).T
oneHotA = colA.__eq__(order).astype("int64")
oneHotP = colP.__eq__(order).astype("int64")
conf_mat = np.dot(oneHotA.T, oneHotP)
conf_mat = np.asarray(conf_mat)
return [conf_mat, order]
x = vector()
y = vector()
f = pytensor.function([x, y], confusion_matrix(x, y))
list_inputs = [
[[0, 1, 2, 1, 0], [0, 0, 2, 1, 2]],
[[2, 0, 2, 2, 0, 1], [0, 0, 2, 2, 0, 2]],
]
for case in list_inputs:
a = np.asarray(case[0])
b = np.asarray(case[1])
out_exp = numpy_conf_mat(a, b)
outs = f(case[0], case[1])
for exp_res, out in zip(out_exp, outs):
utt.assert_allclose(exp_res, out)
from collections import OrderedDict
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor.configdefaults import config
from pytensor.tensor.math import sum as at_sum
from pytensor.tensor.nnet import batchnorm
from pytensor.tensor.shape import specify_broadcastable
from pytensor.tensor.type import (
TensorType,
matrix,
scalar,
tensor3,
tensor4,
tensor5,
vector,
)
from tests import unittest_tools as utt
def test_BNComposite():
with config.change_flags(compute_test_value="raise"):
def bn_ref(x, G, B, M, V):
n = (x - M) / V
return n * G + B
rng = np.random.default_rng(1234)
X = 1 + rng.random([10, 20]).astype("float32")
B = 1 + rng.random([20]).astype("float32")
G = 1 + rng.random([20]).astype("float32")
M = 1 + rng.random([20]).astype("float32")
V = 1 + rng.random([20]).astype("float32")
x = matrix("x")
b = vector("b")
g = vector("g")
m = vector("m")
v = vector("v")
x.tag.test_value = rng.random((2, 2)).astype(pytensor.config.floatX)
b.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
g.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
m.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
v.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
bn_ref_op = bn_ref(x, g, b, m, v)
f_ref = pytensor.function([x, b, g, m, v], [bn_ref_op])
res_ref = f_ref(X, G, B, M, V)
for mode in ["low_mem", "high_mem"]:
bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
f = pytensor.function([x, b, g, m, v], [bn_op])
res = f(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
def test_batch_normalization():
def bn_ref(x, G, B, M, V):
n = (x - M) / V
return n * G + B
rng = np.random.default_rng(1234)
X = 1 + rng.random([10, 20]).astype("float32")
B = 1 + rng.random([20]).astype("float32")
G = 1 + rng.random([20]).astype("float32")
M = 1 + rng.random([20]).astype("float32")
V = 1 + rng.random([20]).astype("float32")
x = matrix("x")
b = vector("b")
g = vector("g")
m = vector("m")
v = vector("v")
bn_ref_op = bn_ref(x, g, b, m, v)
f_ref = pytensor.function([x, g, b, m, v], [bn_ref_op])
res_ref = f_ref(X, G, B, M, V)
for mode in ["low_mem", "high_mem"]:
bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
f = pytensor.function([x, g, b, m, v], [bn_op])
res = f(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
def bn_f(inputs, gamma, beta, mean, std):
return batchnorm.batch_normalization(
inputs, gamma, beta, mean, std, mode=mode
)
utt.verify_grad(bn_f, [X, G, B, M, V])
bn_ref_op = bn_ref(
x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True)
)
f_ref = pytensor.function([x, b, g], [bn_ref_op])
res_ref = f_ref(X, G, B)
for mode in ["low_mem", "high_mem"]:
bn_op = batchnorm.batch_normalization(
x,
g,
b,
x.mean(axis=0, keepdims=True),
x.std(axis=0, keepdims=True),
mode=mode,
)
f = pytensor.function([x, b, g], [bn_op])
res = f(X, G, B)
utt.assert_allclose(res_ref, res)
def bn_f(inputs, gamma, beta, mean, std):
return batchnorm.batch_normalization(
inputs, gamma, beta, mean, std, mode=mode
)
utt.verify_grad(
bn_f, [X, G, B, X.mean(axis=0)[np.newaxis], X.std(axis=0)[np.newaxis]]
)
def test_bn_feature_maps():
def bn_ref(x, G, B, M, V):
n = (x - M) / V
return n * G + B
rng = np.random.default_rng(1234)
X = 1 + rng.random([2, 3, 4, 4]).astype("float32")
B = 1 + rng.random([3]).astype("float32")
G = 1 + rng.random([3]).astype("float32")
M = 1 + rng.random([3]).astype("float32")
V = 1 + rng.random([3]).astype("float32")
x = tensor4("x")
b = vector("b")
g = vector("g")
m = vector("m")
v = vector("v")
bn_ref_op = bn_ref(
x,
g.dimshuffle("x", 0, "x", "x"),
b.dimshuffle("x", 0, "x", "x"),
m.dimshuffle("x", 0, "x", "x"),
v.dimshuffle("x", 0, "x", "x"),
)
f_ref = pytensor.function([x, b, g, m, v], [bn_ref_op])
res_ref = f_ref(X, G, B, M, V)
for mode in ["low_mem", "high_mem"]:
bn_op = batchnorm.batch_normalization(
x,
g.dimshuffle("x", 0, "x", "x"),
b.dimshuffle("x", 0, "x", "x"),
m.dimshuffle("x", 0, "x", "x"),
v.dimshuffle("x", 0, "x", "x"),
mode=mode,
)
f = pytensor.function([x, b, g, m, v], [bn_op])
res = f(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
def conv_bn(inputs, gamma, beta, mean, std):
return batchnorm.batch_normalization(
inputs,
gamma.dimshuffle("x", 0, "x", "x"),
beta.dimshuffle("x", 0, "x", "x"),
mean.dimshuffle("x", 0, "x", "x"),
std.dimshuffle("x", 0, "x", "x"),
mode=mode,
)
utt.verify_grad(conv_bn, [X, G, B, M, V])
@pytest.mark.slow
def test_batch_normalization_train():
for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
for vartype in (tensor5, tensor3, vector):
x, scale, bias, running_mean, running_var = (
vartype(n)
for n in ("x", "scale", "bias", "running_mean", "running_var")
)
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
running_average_factor = 0.3
# remove non-existing axes
if isinstance(axes, tuple):
axes = tuple(i for i in axes if i < ndim)
if len(axes) == 0:
continue
# forward pass
(
out,
x_mean,
x_invstd,
out_running_mean,
out_running_var,
) = batchnorm.batch_normalization_train(
x,
scale,
bias,
axes,
eps,
running_average_factor,
running_mean,
running_var,
)
# reference forward pass
if axes == "per-activation":
axes2 = (0,)
elif axes == "spatial":
axes2 = (0,) + tuple(range(2, ndim))
else:
axes2 = axes
x_mean2 = x.mean(axis=axes2, keepdims=True)
x_var2 = x.var(axis=axes2, keepdims=True)
x_invstd2 = at.reciprocal(at.sqrt(x_var2 + eps))
scale2 = specify_broadcastable(scale, *axes2)
bias2 = specify_broadcastable(bias, *axes2)
out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
m = at.cast(at.prod(x.shape) / at.prod(scale.shape), pytensor.config.floatX)
out_running_mean2 = (
running_mean * (1 - running_average_factor)
+ x_mean2 * running_average_factor
)
out_running_var2 = (
running_var * (1 - running_average_factor)
+ (m / (m - 1)) * x_var2 * running_average_factor
)
# backward pass
dy = vartype("dy")
grads = at.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
# reference backward pass
grads2 = at.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
# second-order backward pass
dx = vartype("dinputs")
dscale = vartype("dscale")
dbias = vartype("dbias")
grad_grads = at.grad(
None,
wrt=[x, dy, scale],
known_grads=OrderedDict(
{grads[0]: dx, grads[1]: dscale, grads[2]: dbias}
),
consider_constant=[
x,
dy,
scale,
bias,
x_mean,
x_invstd,
running_mean,
running_var,
],
return_disconnected="zero",
)
# reference second-order backward pass
grad_grads2 = at.grad(
None,
wrt=[x, dy, scale],
known_grads=OrderedDict(
{grads2[0]: dx, grads2[1]: dscale, grads2[2]: dbias}
),
consider_constant=[
x,
dy,
scale,
bias,
x_mean2,
x_var2,
running_mean,
running_var,
],
return_disconnected="zero",
)
# compile
f = pytensor.function(
[x, scale, bias, running_mean, running_var, dy, dx, dscale, dbias],
[
out,
x_mean,
x_invstd,
out_running_mean,
out_running_var,
out2,
x_mean2,
x_invstd2,
out_running_mean2,
out_running_var2,
]
+ grads
+ grads2
+ grad_grads
+ grad_grads2,
)
# check if the abstract Ops have been replaced
assert not any(
isinstance(
n.op,
(
batchnorm.AbstractBatchNormTrain,
batchnorm.AbstractBatchNormInference,
batchnorm.AbstractBatchNormTrainGrad,
),
)
for n in f.maker.fgraph.toposort()
)
# run
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(
1 if d in axes2 else s for d, s in enumerate(data_shape)
)
rng = np.random.default_rng(1234)
X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
Scale = rng.random(param_shape).astype(pytensor.config.floatX)
Bias = rng.random(param_shape).astype(pytensor.config.floatX)
Running_mean = rng.random(param_shape).astype(pytensor.config.floatX)
Running_var = rng.random(param_shape).astype(pytensor.config.floatX)
Dx = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
Dscale = -1 + 2 * rng.random(param_shape).astype(pytensor.config.floatX)
Dbias = rng.random(param_shape).astype(pytensor.config.floatX)
outputs = f(
X, Scale, Bias, Running_mean, Running_var, Dy, Dx, Dscale, Dbias
)
# compare outputs
utt.assert_allclose(outputs[0], outputs[0 + 5]) # out
utt.assert_allclose(outputs[1], outputs[1 + 5]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 5]) # invstd
utt.assert_allclose(outputs[3], outputs[3 + 5]) # running_mean
utt.assert_allclose(
np.nan_to_num(outputs[4]), np.nan_to_num(outputs[4 + 5])
) # running_var
# compare gradients
utt.assert_allclose(outputs[10], outputs[10 + 3], atol=1e-4) # dx
utt.assert_allclose(
outputs[11], outputs[11 + 3], rtol=2e-4, atol=1e-4
) # dscale
utt.assert_allclose(outputs[12], outputs[12 + 3]) # dbias
# compare second-order gradients
utt.assert_allclose(outputs[16], outputs[16 + 3], atol=1e-4) # ddx
utt.assert_allclose(outputs[17], outputs[17 + 3]) # ddy
utt.assert_allclose(
outputs[18], outputs[18 + 3], rtol=3e-4, atol=1e-4
) # ddscale
@pytest.mark.slow
def test_batch_normalization_train_grad_grad():
for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
for vartype in (tensor5, tensor4, tensor3, matrix, vector):
# run these experiments with float64 for sufficient numerical stability
x, dy, scale, x_mean, x_invstd = (
vartype(n, dtype="float64")
for n in ("x", "dy", "scale", "x_mean", "x_invstd")
)
ndim = x.ndim
# reference forward pass
if axes == "per-activation":
axes = (0,)
elif axes == "spatial":
axes = (0,) + tuple(range(2, ndim))
else:
# remove non-existing axes
axes = tuple(i for i in axes if i < ndim)
if len(axes) == 0:
continue
def bn_grad_wrt_inputs_f(x, dy, scale, x_mean, x_invstd):
g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
x, dy, scale, x_mean, x_invstd
)
return g_inputs
def bn_grad_wrt_scale_f(x, dy, scale, x_mean, x_invstd):
g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
x, dy, scale, x_mean, x_invstd
)
return g_scale
def bn_grad_wrt_bias_f(x, dy, scale, x_mean, x_invstd):
g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
x, dy, scale, x_mean, x_invstd
)
return g_bias
# run
for data_shape in ((4, 3, 3, 3, 3), (4, 3, 1, 1, 1), (2, 3, 5, 3, 2)):
data_shape = data_shape[:ndim]
param_shape = tuple(
1 if d in axes else s for d, s in enumerate(data_shape)
)
rng = np.random.default_rng(1234)
# force float64 for sufficient numerical stability
x_val = 4 + 3 * rng.random(data_shape).astype("float64")
dy_val = -1 + 2 * rng.random(data_shape).astype("float64")
scale_val = rng.random(param_shape).astype("float64")
x_mean_val = rng.random(param_shape).astype("float64")
x_invstd_val = rng.random(param_shape).astype("float64")
utt.verify_grad(
bn_grad_wrt_inputs_f,
[x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
abs_tol=5e-4,
rel_tol=5e-4,
)
utt.verify_grad(
bn_grad_wrt_scale_f,
[x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
)
utt.verify_grad(
bn_grad_wrt_bias_f,
[x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
)
def test_batch_normalization_train_without_running_averages():
# compile and run batch_normalization_train without running averages
x, scale, bias, dy = (
tensor4("x"),
tensor4("scale"),
tensor4("bias"),
tensor4("dy"),
)
data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25)
# forward pass
out, x_mean, x_invstd = batchnorm.batch_normalization_train(
x, scale, bias, "per-activation"
)
# backward pass
grads = at.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
# compile
f = pytensor.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
# check if the abstract Ops have been replaced
assert not any(
isinstance(
n.op,
(
batchnorm.AbstractBatchNormTrain,
batchnorm.AbstractBatchNormInference,
batchnorm.AbstractBatchNormTrainGrad,
),
)
for n in f.maker.fgraph.toposort()
)
# run
rng = np.random.default_rng(1234)
X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
Scale = rng.random(param_shape).astype(pytensor.config.floatX)
Bias = rng.random(param_shape).astype(pytensor.config.floatX)
f(X, Scale, Bias, Dy)
def test_batch_normalization_train_broadcast():
for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
for vartype in (tensor5, tensor4, tensor3, matrix, vector):
x = vartype("x")
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
running_average_factor = 0.3
# remove non-existing axes
if isinstance(axes, tuple):
axes = tuple(i for i in axes if i < ndim)
if len(axes) == 0:
continue
# convert axes to explicit list
if axes == "per-activation":
axes2 = (0,)
elif axes == "spatial":
axes2 = (0,) + tuple(range(2, ndim))
else:
axes2 = axes
# compute axes for parameter tensors
non_bc_axes = tuple(i for i in range(ndim) if i not in axes2)
params_dimshuffle = ["x"] * ndim
for i, axis in enumerate(non_bc_axes):
params_dimshuffle[axis] = i
# construct non-broadcasted parameter variables
param_type = TensorType(x.dtype, shape=(None,) * len(non_bc_axes))
scale, bias, running_mean, running_var = (
param_type(n) for n in ("scale", "bias", "running_mean", "running_var")
)
# broadcast parameter variables
scale_bc = scale.dimshuffle(params_dimshuffle)
bias_bc = bias.dimshuffle(params_dimshuffle)
running_mean_bc = running_mean.dimshuffle(params_dimshuffle)
running_var_bc = running_var.dimshuffle(params_dimshuffle)
# batch_normalization_train with original, non-broadcasted variables
train_non_bc = batchnorm.batch_normalization_train(
x,
scale,
bias,
axes,
eps,
running_average_factor,
running_mean,
running_var,
)
# batch_normalization_train with broadcasted variables
train_bc = batchnorm.batch_normalization_train(
x,
scale_bc,
bias_bc,
axes,
eps,
running_average_factor,
running_mean_bc,
running_var_bc,
)
train_bc = tuple(
[train_bc[0]] + [r.dimshuffle(non_bc_axes) for r in train_bc[1:]] # out
)
# batch_normalization_test with original, non-broadcasted variables
test_non_bc = batchnorm.batch_normalization_test(
x, scale, bias, running_mean, running_var, axes, eps
)
# batch_normalization_test with broadcasted variables
test_bc = batchnorm.batch_normalization_test(
x, scale_bc, bias_bc, running_mean_bc, running_var_bc, axes, eps
)
# subtract the results of the non-broadcasted and broadcasted calls
results_non_bc = train_non_bc + (test_non_bc,)
results_bc = train_bc + (test_bc,)
results = [abs(r - r_bc) for (r, r_bc) in zip(results_non_bc, results_bc)]
# compile to compute all differences
f = pytensor.function(
[x, scale, bias, running_mean, running_var], at_sum(sum(results))
)
# the paired ops are exactly the same, so the optimizer should have
# collapsed the sum of differences to a constant zero
nodes = f.maker.fgraph.toposort()
if pytensor.config.mode != "FAST_COMPILE":
assert len(nodes) == 1
assert isinstance(nodes[0].op, pytensor.compile.DeepCopyOp)
inputs = [
np.asarray(np.random.random((4,) * n), x.dtype)
for n in [
x.ndim,
scale.ndim,
bias.ndim,
running_mean.ndim,
running_var.ndim,
]
]
assert 0.0 == f(*inputs)
@pytest.mark.slow
def test_batch_normalization_test():
for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
for vartype in (tensor5, tensor3, vector):
x, scale, bias, mean, var = (
vartype(n) for n in ("x", "scale", "bias", "mean", "var")
)
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
# remove non-existing axes
if isinstance(axes, tuple):
axes = tuple(i for i in axes if i < ndim)
if len(axes) == 0:
continue
# forward pass
out = batchnorm.batch_normalization_test(
x, scale, bias, mean, var, axes, eps
)
# reference forward pass
if axes == "per-activation":
axes2 = (0,)
elif axes == "spatial":
axes2 = (0,) + tuple(range(2, ndim))
else:
axes2 = axes
scale2, bias2, mean2, var2 = (
specify_broadcastable(t, *axes2) for t in (scale, bias, mean, var)
)
out2 = (x - mean2) * (scale2 / at.sqrt(var2 + eps)) + bias2
# backward pass
dy = vartype("dy")
grads = at.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}
)
# reference backward pass
grads2 = at.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}
)
# compile
f = pytensor.function(
[x, scale, bias, mean, var, dy], [out, out2] + grads + grads2
)
# check if the abstract Ops have been replaced
assert not any(
isinstance(
n.op,
(
batchnorm.AbstractBatchNormTrain,
batchnorm.AbstractBatchNormInference,
batchnorm.AbstractBatchNormTrainGrad,
),
)
for n in f.maker.fgraph.toposort()
)
# run
for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(
1 if d in axes2 else s for d, s in enumerate(data_shape)
)
rng = np.random.default_rng(1234)
X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
Scale = rng.random(param_shape).astype(pytensor.config.floatX)
Bias = rng.random(param_shape).astype(pytensor.config.floatX)
Mean = rng.random(param_shape).astype(pytensor.config.floatX)
Var = rng.random(param_shape).astype(pytensor.config.floatX)
outputs = f(X, Scale, Bias, Mean, Var, Dy)
# compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(
outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5
) # dvar
def test_batch_normalization_broadcastable():
# check if the broadcastable pattern is preserved by the optimizations
x, dy, scale, bias, mean, var = (
scalar(n).dimshuffle(["x"] * 5)
for n in ("x", "dy", "scale", "bias", "mean", "var")
)
# forward pass
out_train, x_mean, x_invstd = batchnorm.batch_normalization_train(
x, scale, bias, "spatial"
)
out_test = batchnorm.batch_normalization_test(x, scale, bias, mean, var, "spatial")
# backward pass
grads_train = at.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
grads_test = at.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
# compile
f = pytensor.function(
[x, scale, bias, mean, var, dy],
[out_train, x_mean, x_invstd, out_test] + grads_train + grads_test,
)
assert not any(
isinstance(
n.op,
(
batchnorm.AbstractBatchNormTrain,
batchnorm.AbstractBatchNormInference,
batchnorm.AbstractBatchNormTrainGrad,
),
)
for n in f.maker.fgraph.toposort()
)
"""
Tests for block sparse dot
"""
import numpy as np
import pytensor
import pytensor.tensor as at
import tests.unittest_tools as utt
from pytensor.tensor.elemwise import DimShuffle
from pytensor.tensor.nnet.blocksparse import (
SparseBlockGemv,
SparseBlockOuter,
sparse_block_dot,
sparse_block_gemv,
sparse_block_outer,
)
from pytensor.tensor.type import fmatrix, ftensor3, ftensor4, imatrix
class TestBlockSparseGemvAndOuter(utt.InferShapeTester):
def setup_method(self):
mode = None
if pytensor.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
self.mode = pytensor.compile.get_mode(mode).excluding("constant_folding")
self.gemv_op = sparse_block_gemv
self.outer_op = sparse_block_outer
self.gemv_class = SparseBlockGemv
self.outer_class = SparseBlockOuter
super().setup_method()
@staticmethod
def gemv_data():
nInputBlock = 8
nOutputBlock = 7
inputSize = 6
outputSize = 5
inputWindowSize = 4
outputWindowSize = 3
batchSize = 2
rng = np.random.default_rng(230920)
input = rng.standard_normal((batchSize, inputWindowSize, inputSize)).astype(
"float32"
)
inputIndice = np.vstack(
rng.permutation(nInputBlock)[:inputWindowSize] for _ in range(batchSize)
).astype("int32")
outputIndice = np.vstack(
rng.permutation(nOutputBlock)[:outputWindowSize] for _ in range(batchSize)
).astype("int32")
weight = rng.standard_normal(
(nInputBlock, nOutputBlock, inputSize, outputSize)
).astype("float32")
bias = rng.standard_normal((nOutputBlock, outputSize)).astype("float32")
return weight, input, inputIndice, bias, outputIndice
@staticmethod
def outer_data():
nInputBlock = 8
nOutputBlock = 7
xSize = 6
ySize = 5
xWindowSize = 4
yWindowSize = 3
batchSize = 2
rng = np.random.default_rng(230920)
o = rng.standard_normal((nInputBlock, nOutputBlock, xSize, ySize)).astype(
"float32"
)
x = rng.standard_normal((batchSize, xWindowSize, xSize)).astype("float32")
y = rng.standard_normal((batchSize, yWindowSize, ySize)).astype("float32")
xIdx = np.vstack(
rng.integers(0, nInputBlock, size=xWindowSize) for _ in range(batchSize)
).astype("int32")
yIdx = np.vstack(
rng.integers(0, nOutputBlock, size=yWindowSize) for _ in range(batchSize)
).astype("int32")
return o, x, y, xIdx, yIdx
@staticmethod
def gemv_numpy(o, W, h, iIdx, oIdx):
for b in range(o.shape[0]):
for j in range(o.shape[1]):
outputIdx = oIdx[b, j]
for i in range(h.shape[1]):
inputIdx = iIdx[b, i]
w = W[inputIdx, outputIdx]
o[b, j, :] += np.dot(h[b, i], w)
return o
@staticmethod
def gemv_numpy2(o, W, h, iIdx, oIdx):
"""
Other implementation
"""
from numpy import ix_
for b in range(o.shape[0]):
w = W[ix_(iIdx[b], oIdx[b])].swapaxes(1, 2)
w = w.reshape((w.shape[0] * w.shape[1], w.shape[2] * w.shape[3]))
o[b] += np.dot(h[b].ravel(), w).reshape(o.shape[1:])
return o
@staticmethod
def gemv_numpy3(o, W, h, iIdx, oIdx):
"""
Other implementation
"""
from numpy import ix_
for b in range(o.shape[0]):
w = W[ix_(iIdx[b], oIdx[b])]
# The next three lines do the same operation. The last one is the
# fastest
# o[b] += (h[b][:, None, :, None] * w).sum(axis=(0, 2))
# o[b] += np.tensordot(h[b], w, [(0,1),(0,2)])
o[b] += np.einsum("ik,ijkl", h[b], w)
return o
@staticmethod
def outer_numpy(o, x, y, xIdx, yIdx):
for b in range(x.shape[0]):
for i in range(xIdx.shape[1]):
for j in range(yIdx.shape[1]):
o[xIdx[b, i], yIdx[b, j]] += np.outer(x[b, i, :], y[b, j, :])
return o
def test_sparseblockdot(self):
# Compares the numpy version of sparseblockgemv to sparse_block_dot.
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
o = sparse_block_dot(W, h, iIdx, b, oIdx)
f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
ref_out = self.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemv(self):
# Compares the numpy and pytensor versions of sparseblockgemv.
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
ref_out = self.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemvF(self):
# Test the fortran order for W (which can happen in the grad for some
# graphs).
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
o = self.gemv_op(
b.take(oIdx, axis=0),
DimShuffle((False, False, False, False), (0, 1, 3, 2))(
at.as_tensor_variable(W)
),
h,
iIdx,
oIdx,
)
f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
ref_out = self.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemv_grad(self):
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
iIdx = at.constant(iIdx_val)
oIdx = at.constant(oIdx_val)
def metaop(b, h, W):
return sparse_block_dot(W, h, iIdx, b, oIdx)
def op(b, h, W):
return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
eps = 3e-3
utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode, eps=eps)
utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode, eps=eps)
def test_sparseblockgemv_grad_1(self):
# Test that we correctly handle cases where dimensions are 1.
rng = np.random.default_rng(230920)
h_val = rng.standard_normal((1, 1, 1)).astype("float32")
iIdx_val = rng.permutation(1)[:1][None, :]
oIdx_val = rng.permutation(1)[:1][None, :]
W_val = rng.standard_normal((1, 1, 1, 1)).astype("float32")
b_val = rng.standard_normal((1, 1)).astype("float32")
iIdx = at.constant(iIdx_val)
oIdx = at.constant(oIdx_val)
def metaop(b, h, W):
return sparse_block_dot(W, h, iIdx, b, oIdx)
def op(b, h, W):
return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
def test_sparseblockgemv_grad_shape(self):
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
go = pytensor.grad(o.sum(), [b, W, h])
f = pytensor.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
# just make sure that it runs correctly and all the shapes are ok.
b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
assert b_g.shape == b_val.shape
assert h_g.shape == h_val.shape
assert W_g.shape == W_val.shape
def test_sparseblockouter(self):
o = ftensor4()
x = ftensor3()
y = ftensor3()
xIdx = imatrix()
yIdx = imatrix()
out = self.outer_op(o, x, y, xIdx, yIdx)
f = pytensor.function(
[o, x, y, xIdx, yIdx], out, on_unused_input="warn", mode=self.mode
)
(
o_val,
x_val,
y_val,
xIdx_val,
yIdx_val,
) = self.outer_data()
th_out = f(o_val, x_val, y_val, xIdx_val, yIdx_val)
ref_out = self.outer_numpy(o_val, x_val, y_val, xIdx_val, yIdx_val)
utt.assert_allclose(ref_out, th_out)
def test_dot_infershape(self):
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
self._compile_and_check(
[W, h, iIdx, b, oIdx],
[sparse_block_dot(W, h, iIdx, b, oIdx)],
self.gemv_data(),
self.gemv_class,
)
def test_gemv_infershape(self):
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = imatrix()
oIdx = imatrix()
self._compile_and_check(
[W, h, iIdx, b, oIdx],
[self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)],
self.gemv_data(),
self.gemv_class,
)
def test_outer_infershape(self):
o = ftensor4()
x = ftensor3()
y = ftensor3()
xIdx = imatrix()
yIdx = imatrix()
self._compile_and_check(
[o, x, y, xIdx, yIdx],
[self.outer_op(o, x, y, xIdx, yIdx)],
self.outer_data(),
self.outer_class,
)
import time
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor.compile.mode import Mode
from pytensor.tensor.exceptions import NotScalarConstantError
from pytensor.tensor.math import _allclose, exp
from pytensor.tensor.nnet import conv, conv2d
from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dvector, scalar, tensor4
from tests import unittest_tools as utt
@pytest.mark.skipif(
pytensor.config.cxx == "",
reason="conv2d tests need SciPy or a c++ compiler",
)
class TestConv2D(utt.InferShapeTester):
# This class contains tests for the legacy 2d convolution,
# but will also be inherited from for other implementations
mode = None
dtype = pytensor.config.floatX
# This will be set to the appropriate function in the inherited classes.
# The call to `staticmethod` is necessary to prevent Python from passing
# `self` as the first argument.
conv2d = staticmethod(conv2d)
def setup_method(self):
self.input = tensor4("input", dtype=self.dtype)
self.input.name = "default_V"
self.filters = tensor4("filters", dtype=self.dtype)
self.filters.name = "default_filters"
super().setup_method()
def validate(
self,
image_shape,
filter_shape,
border_mode="valid",
subsample=(1, 1),
N_image_shape=None,
N_filter_shape=None,
input=None,
filters=None,
unroll_batch=None,
unroll_kern=None,
unroll_patch=None,
verify_grad=True,
should_raise=False,
):
"""
:param image_shape: The constant shape info passed to conv2d.
:param filter_shape: The constant shape info passed to conv2d.
:param N_image_shape: None(default to image_shape) or tuple of
4 elements with the shape of the input image
:param N_filter_shape: None(default to filter_shape) or tuple
of 4 elements with the shape of the
input filter
"""
if N_image_shape is None:
N_image_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x))
for x in image_shape
]
if N_filter_shape is None:
N_filter_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x))
for x in filter_shape
]
if input is None:
input = self.input
if not filters:
filters = self.filters
# PYTENSOR IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def sym_conv2d(input, filters):
# define pytensor graph and function
input.name = "input"
filters.name = "filters"
with pytest.warns(DeprecationWarning):
rval = conv.conv2d(
input,
filters,
image_shape,
filter_shape,
border_mode,
subsample,
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
rval.name = "conv_output"
return rval
output = sym_conv2d(input, filters)
output.name = f"conv2d({input.name},{filters.name})"
pytensor_conv = pytensor.function([input, filters], output, mode=self.mode)
# initialize input and compute result
image_data = np.random.random(N_image_shape).astype(self.dtype)
filter_data = np.random.random(N_filter_shape).astype(self.dtype)
try:
pytensor_output = pytensor_conv(image_data, filter_data)
except ValueError:
if not should_raise:
raise
return
else:
if should_raise:
raise Exception("ConvOp should have generated an error")
# REFERENCE IMPLEMENTATION
s = 1.0
orig_image_data = image_data
if border_mode != "full":
s = -1.0
out_shape2d = (
np.array(N_image_shape[-2:]) + s * np.array(N_filter_shape[-2:]) - s
)
out_shape2d = np.ceil(out_shape2d / np.array(subsample))
# avoid numpy deprecation
out_shape2d = out_shape2d.astype("int32")
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
ref_output = np.zeros(out_shape)
# loop over output feature maps
ref_output.fill(0)
if border_mode == "full":
image_data2 = np.zeros(
(
N_image_shape[0],
N_image_shape[1],
N_image_shape[2] + 2 * N_filter_shape[2] - 2,
N_image_shape[3] + 2 * N_filter_shape[3] - 2,
)
)
image_data2[
:,
:,
N_filter_shape[2] - 1 : N_filter_shape[2] - 1 + N_image_shape[2],
N_filter_shape[3] - 1 : N_filter_shape[3] - 1 + N_image_shape[3],
] = image_data
image_data = image_data2
N_image_shape = image_data.shape
for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
for im0 in range(N_image_shape[1]):
filter2d = filter_data[nn, im0, :, :]
image2d = image_data[bb, im0, :, :]
for row in range(ref_output.shape[2]):
irow = row * subsample[0] # image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
ref_output[bb, nn, row, col] += (
image2d[
irow : irow + N_filter_shape[2],
icol : icol + N_filter_shape[3],
]
* filter2d[::-1, ::-1]
).sum()
assert _allclose(pytensor_output, ref_output)
# TEST GRADIENT
if verify_grad:
utt.verify_grad(sym_conv2d, [orig_image_data, filter_data])
def test_basic1(self):
# Tests that basic convolutions work for odd and even
# dimensions of image and filter shapes, as well as rectangular
# images and filters.
self.validate((2, 2, 3, 3), (2, 2, 2, 2), "valid", verify_grad=False)
def test_basic(self):
# Tests that basic convolutions work for odd and even
# dimensions of image and filter shapes, as well as rectangular
# images and filters.
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", verify_grad=False)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid")
self.validate((3, 2, 7, 5), (5, 2, 3, 2), "valid", verify_grad=False)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "full", verify_grad=False)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full")
# test filter same size as input
def test_uint_image_shape_datatype(self):
# Tests for uint datatype in image_shape.
self.validate((2, 2, 3, np.uint8(3)), (3, 2, 3, 3), "valid", verify_grad=False)
self.validate((np.uint16(2), 2, 3, 3), (3, 2, 3, 3), "valid", verify_grad=False)
self.validate((2, np.uint32(2), 3, 3), (3, 2, 3, 3), "valid", verify_grad=False)
def test_uint_filter_shape_datatype(self):
# Tests for uint datatype in filter_shape
self.validate((3, 2, 3, 3), (2, 2, 3, np.uint8(3)), "valid", verify_grad=False)
self.validate((3, 2, 3, 3), (np.uint16(2), 2, 3, 3), "valid", verify_grad=False)
self.validate((3, 2, 3, 3), (2, np.uint32(2), 3, 3), "valid", verify_grad=False)
def test_img_kernel_same_shape(self):
self.validate((3, 2, 3, 3), (4, 2, 3, 3), "full")
self.validate((3, 2, 3, 3), (4, 2, 3, 3), "valid")
def test_unroll_patch_true(self):
# Test basic convs with True.
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", unroll_patch=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", unroll_patch=True)
self.validate(
(3, 2, 3, 3), (4, 2, 3, 3), "valid", unroll_patch=True, verify_grad=False
)
def test_unroll_patch_false(self):
# Test basic convs with False.
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", unroll_patch=False)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", unroll_patch=False)
self.validate(
(3, 2, 3, 3), (4, 2, 3, 3), "valid", unroll_patch=False, verify_grad=False
)
def test_unroll_patch_true_fail(self):
# Test basic convs with True.
self.validate(
(3, 2, 7, 5),
(5, 2, 2, 3),
"valid",
unroll_patch=True,
N_image_shape=(1, 3, 3, 3),
N_filter_shape=(6, 3, 2, 2),
should_raise=True,
)
self.validate(
(3, 2, 7, 5),
(5, 2, 2, 3),
"full",
unroll_patch=True,
N_image_shape=(1, 3, 3, 3),
N_filter_shape=(6, 3, 2, 2),
should_raise=True,
)
self.validate(
(3, 2, 3, 3),
(4, 2, 3, 3),
"valid",
unroll_patch=True,
N_image_shape=(1, 3, 3, 3),
N_filter_shape=(6, 3, 2, 2),
should_raise=True,
)
def test_unroll_special(self):
# (unroll_kern, unroll_batch) in (0,1),(1,0) is special case.
self.validate((6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=1)
def test_unroll_batch(self):
# Test mini-batch unrolling for various legal values.
# mini-batch of size 6 is multiple of 2 and 3. Should work.
self.validate(
(6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=2, verify_grad=False
)
self.validate(
(6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=3, verify_grad=False
)
def test_unroll_kern(self):
# Test kernel unrolling for various legal values.
# 6 filters is a multiple of 2 and 3. Should work.
self.validate(
(2, 3, 3, 3), (6, 3, 2, 2), "valid", unroll_kern=2, verify_grad=False
)
self.validate(
(2, 3, 3, 3), (6, 3, 2, 2), "valid", unroll_kern=3, verify_grad=False
)
def test_unroll_batch_kern(self):
# Test mini-batch unrolling with kernel unrolling for various
# legal values.
# mini-batch of size 6 is multiple of 2 and 3. Should work.
self.validate(
(6, 2, 3, 3),
(3, 2, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=3,
verify_grad=False,
)
self.validate(
(6, 2, 3, 3),
(3, 2, 2, 2),
"valid",
unroll_batch=3,
unroll_kern=3,
verify_grad=False,
)
# 6 filters is a multiple of 2 and 3. Should work.
self.validate(
(2, 3, 3, 3),
(6, 3, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=2,
verify_grad=False,
)
self.validate(
(2, 3, 3, 3),
(6, 3, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=3,
verify_grad=False,
)
def test_unroll_batch_kern_fail(self):
# Test mini-batch unrolling with kernel unrolling for various
# legal values, but pass bad input. All those test must
# generate errors
# mini-batch of size 6 is multiple of 2 and 3. Should work.
self.validate(
(6, 2, 3, 3),
(3, 2, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=3,
N_image_shape=(7, 2, 3, 3),
N_filter_shape=(3, 2, 2, 2),
should_raise=True,
)
self.validate(
(6, 2, 3, 3),
(3, 2, 2, 2),
"valid",
unroll_batch=3,
unroll_kern=3,
N_image_shape=(6, 2, 3, 3),
N_filter_shape=(4, 2, 2, 2),
should_raise=True,
)
self.validate(
(2, 3, 3, 3),
(6, 3, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=2,
N_image_shape=(1, 3, 3, 3),
N_filter_shape=(6, 3, 2, 2),
should_raise=True,
)
self.validate(
(2, 3, 3, 3),
(6, 3, 2, 2),
"valid",
unroll_batch=2,
unroll_kern=3,
N_image_shape=(2, 3, 3, 3),
N_filter_shape=(5, 3, 2, 2),
should_raise=True,
)
def test_subsample(self):
# Tests convolution where subsampling != (1,1)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 2))
# Fails as of 2012-07-11
with pytest.raises(NotImplementedError):
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", subsample=(3, 3))
# Fails as of 2017-08-10
with pytest.raises(NotImplementedError):
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 2))
with pytest.raises(NotImplementedError):
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 1))
with pytest.raises(NotImplementedError):
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "valid", subsample=(3, 3))
def test_shape_Constant_tensor(self):
# Tests convolution where the {image,filter}_shape is a Constant tensor.
as_t = at.as_tensor_variable
self.validate((as_t(3), as_t(2), as_t(7), as_t(5)), (5, 2, 2, 3), "valid")
self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), "valid")
self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), "valid")
self.validate((3, 2, 7, 5), (as_t(5), as_t(2), as_t(2), as_t(3)), "valid")
self.validate((3, 2, 7, 5), as_t([5, 2, 2, 3]), "valid")
self.validate((3, 2, 7, 5), as_t((5, 2, 2, 3)), "valid")
self.validate(as_t([3, 2, 7, 5]), as_t([5, 2, 2, 3]), "full")
def test_invalid_filter_shape(self):
# Tests scenario where filter_shape[1] != input_shape[1]
with pytest.raises(AssertionError):
self.validate((3, 2, 8, 8), (4, 3, 5, 5), "valid")
def test_invalid_input_shape(self):
# Tests that when the shape given at build time is not the same as
# run time we raise an error
for unroll_batch in [None, 1, 3]:
for unroll_kern in [None, 2, 4]:
for unroll_patch in [None, True, False]:
for mode in ["valid", "full"]:
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_image_shape=(2, 2, 8, 8),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_image_shape=(3, 1, 8, 8),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_image_shape=(3, 2, 7, 8),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_image_shape=(3, 2, 8, 7),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_filter_shape=(3, 2, 5, 5),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_filter_shape=(4, 1, 5, 5),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_filter_shape=(4, 2, 6, 5),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
with pytest.raises(ValueError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, 5),
mode,
N_filter_shape=(4, 2, 5, 6),
unroll_batch=unroll_batch,
unroll_kern=unroll_kern,
unroll_patch=unroll_patch,
)
def test_missing_info(self):
# Test convolutions for various pieces of missing info.
self.validate(
None, None, N_image_shape=(3, 2, 8, 8), N_filter_shape=(4, 2, 5, 5)
)
self.validate(
(3, 2, None, None),
None,
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
self.validate(
(None, 2, None, None),
(None, 2, 5, 5),
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
self.validate(
(3, 2, 8, 8),
(4, 2, None, 5),
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
self.validate(
(3, 2, 8, 8),
(4, 2, 5, None),
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
def test_wrong_info(self):
# Test convolutions when we don't give a constant as shape information
i = pytensor.scalar.basic.int32()
with pytest.raises(NotScalarConstantError):
self.validate(
(3, 2, 8, i),
(4, 2, 5, 5),
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
with pytest.raises(NotScalarConstantError):
self.validate(
(3, 2, 8, 8),
(4, 2, 5, i),
N_image_shape=(3, 2, 8, 8),
N_filter_shape=(4, 2, 5, 5),
)
def test_full_mode(self):
# Tests basic convolution in full mode and case where filter
# is larger than the input image.
self.validate((3, 2, 5, 5), (4, 2, 8, 8), "full")
def f():
self.validate((3, 2, 5, 5), (4, 2, 8, 8), "valid")
with pytest.raises(Exception):
f()
def test_wrong_input(self):
# Make sure errors are raised when image and kernel are not 4D tensors
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dmatrix())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=dvector())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dtensor3())
def test_gcc_crash(self):
# gcc 4.3.0 20080428 (Red Hat 4.3.0-8)
#
# crashed in this following case. I changed the c code to don't hit
# gcc bug. So it should not crash anymore
self.validate((1, 10, 213, 129), (46, 10, 212, 1), "valid", verify_grad=False)
def speed(self):
n_calls = 20000
print("n_calls", n_calls)
for border_mode in ["valid", "full"]:
print()
print(border_mode)
for openmp in [False, True]:
print("OpenMP", openmp)
image_shapes = [
(1, 5, 6, 6),
(10, 5, 6, 6)
# (10, 10, 16, 16),
# (10, 10, 32, 32)]
]
print("image_shape", image_shapes)
for image_shape in image_shapes:
filter_shapes = [(1, 5, 4, 4), (2, 5, 4, 4), (5, 5, 4, 4)]
print("filter_shapes", filter_shapes)
for filter_shape in filter_shapes:
input = pytensor.shared(np.random.random(image_shape))
filters = pytensor.shared(np.random.random(filter_shape))
with pytest.warns(DeprecationWarning):
output = conv.conv2d(
input,
filters,
image_shape,
filter_shape,
border_mode,
unroll_patch=True,
openmp=openmp,
)
mode = Mode(
linker=pytensor.link.vm.VMLinker(
allow_gc=False, use_cloop=True
)
)
pytensor_conv = pytensor.function([], output, mode=mode)
t1 = time.perf_counter()
pytensor_conv.vm(n_calls=n_calls)
t2 = time.perf_counter()
print(t2 - t1, end=" ")
print()
def test_infer_shape(self):
# Note: infer_shape is incomplete and thus input and filter shapes
# must be provided explicitly
rng = np.random.default_rng(280284)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
adtens = dtensor4()
bdtens = dtensor4()
aivec_val = [4, 5, 6, 3]
bivec_val = [7, 5, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[
conv.conv2d(
adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
)
],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
aivec_val = [6, 2, 8, 3]
bivec_val = [4, 2, 5, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[
conv.conv2d(
adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
)
],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
aivec_val = [3, 6, 7, 5]
bivec_val = [5, 6, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[
conv.conv2d(
adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
)
],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
aivec_val = [3, 6, 7, 5]
bivec_val = [5, 6, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[
conv.conv2d(
adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
)
],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
aivec_val = [5, 2, 4, 3]
bivec_val = [6, 2, 4, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[
conv.conv2d(
adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
)
],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
with pytest.warns(DeprecationWarning):
self._compile_and_check(
[adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
[adtens_val, bdtens_val],
conv.ConvOp,
excluding=["conv_gemm"],
)
# Test that broadcasting of gradients works correctly when using the
# nnet.conv2d() interface. This was reported in #3763, and uses the example
# code from that ticket.
def test_broadcast_grad():
x1 = tensor4("x")
sigma = scalar("sigma")
window_radius = 3
filter_1d = at.arange(-window_radius, window_radius + 1)
filter_1d = filter_1d.astype(pytensor.config.floatX)
filter_1d = exp(-0.5 * filter_1d**2 / sigma**2)
filter_1d = filter_1d / filter_1d.sum()
filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])
y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None])
# TODO FIXME: Make this a real test and `assert` something
pytensor.grad(y.sum(), sigma)
import numpy as np
import pytest
import pytensor
try:
from scipy import ndimage
except ImportError:
ndimage = None
import tests.unittest_tools as utt
from pytensor.compile.sharedvalue import shared
from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.tensor.nnet.conv3d2d import (
DiagonalSubtensor,
IncDiagonalSubtensor,
conv3d,
get_diagonal_subtensor_view,
)
def test_get_diagonal_subtensor_view(wrap=lambda a: a):
x = np.arange(20).reshape(5, 4).astype("float32")
x = wrap(x)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert np.array_equal(np.asarray(xv01), [[12, 9, 6, 3], [16, 13, 10, 7]])
x = np.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
# print 'x', x
# print 'xv01', xv01
# print 'xv02', xv02
assert np.array_equal(
np.asarray(xv01), [[[12, 13], [8, 9], [4, 5]], [[18, 19], [14, 15], [10, 11]]]
)
assert np.array_equal(
np.asarray(xv02),
[
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
],
)
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, xv12):
assert np.array_equal(xvi, get_diagonal_subtensor_view(xi, 0, 1))
def pyconv3d(signals, filters, border_mode="valid"):
Ns, Ts, C, Hs, Ws = signals.shape
Nf, Tf, C, Hf, Wf = filters.shape
# if border_mode is not 'valid', the signals need zero-padding
if border_mode == "full":
Tpad = Tf - 1
Hpad = Hf - 1
Wpad = Wf - 1
elif border_mode == "half":
Tpad = Tf // 2
Hpad = Hf // 2
Wpad = Wf // 2
else:
Tpad = 0
Hpad = 0
Wpad = 0
if Tpad > 0 or Hpad > 0 or Wpad > 0:
# zero-pad signals
signals_padded = np.zeros(
(Ns, Ts + 2 * Tpad, C, Hs + 2 * Hpad, Ws + 2 * Wpad), "float32"
)
signals_padded[
:, Tpad : (Ts + Tpad), :, Hpad : (Hs + Hpad), Wpad : (Ws + Wpad)
] = signals
Ns, Ts, C, Hs, Ws = signals_padded.shape
signals = signals_padded
Tf2 = Tf // 2
Hf2 = Hf // 2
Wf2 = Wf // 2
rval = np.zeros((Ns, Ts - Tf + 1, Nf, Hs - Hf + 1, Ws - Wf + 1))
for ns in range(Ns):
for nf in range(Nf):
for c in range(C):
s_i = signals[ns, :, c, :, :]
f_i = filters[nf, :, c, :, :]
r_i = rval[ns, :, nf, :, :]
o_i = ndimage.convolve(s_i, f_i, mode="constant", cval=1)
o_i_sh0 = o_i.shape[0]
# print s_i.shape, f_i.shape, r_i.shape, o_i.shape
r_i += o_i[Tf2 : o_i_sh0 - Tf2, Hf2:-Hf2, Wf2:-Wf2]
return rval
def check_diagonal_subtensor_view_traces(fn):
assert check_stack_trace(fn, ops_to_check=(DiagonalSubtensor, IncDiagonalSubtensor))
@pytest.mark.skipif(
ndimage is None or not pytensor.config.cxx,
reason="conv3d2d tests need SciPy and a c++ compiler",
)
@pytest.mark.parametrize("border_mode", ("valid", "full", "half"))
def test_conv3d(border_mode):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.mode.get_mode("FAST_RUN")
else:
mode = pytensor.compile.mode.get_default_mode()
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5
signals = (
np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")
)
filters = (
np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
)
# t0 = time.perf_counter()
pyres = pyconv3d(signals, filters, border_mode)
# print(time.perf_counter() - t0)
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals * 0)
out = conv3d(
s_signals,
s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape,
border_mode=border_mode,
)
newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
check_diagonal_subtensor_view_traces(newconv3d)
# t0 = time.perf_counter()
newconv3d()
# print(time.perf_counter() - t0)
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = pytensor.function(
[],
[],
updates=[(s_filters, gfilters), (s_signals, gsignals)],
mode=mode,
name="grad",
)
check_diagonal_subtensor_view_traces(gnewconv3d)
# t0 = time.perf_counter()
gnewconv3d()
# print("grad", time.perf_counter() - t0)
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2
rng = np.random.default_rng(280284)
signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
utt.verify_grad(
lambda s, f: conv3d(s, f, border_mode=border_mode),
[signals, filters],
eps=1e-1,
mode=mode,
)
# Additional Test that covers the case of patched implementation for filter with Tf=1
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5
signals = (
np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")
)
filters = (
np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
)
# t0 = time.perf_counter()
pyres = pyconv3d(signals, filters, border_mode)
# print(time.perf_counter() - t0)
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals * 0)
out = conv3d(
s_signals,
s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape,
border_mode=border_mode,
)
newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
# t0 = time.perf_counter()
newconv3d()
# print(time.perf_counter() - t0)
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = pytensor.function(
[],
[],
updates=[(s_filters, gfilters), (s_signals, gsignals)],
mode=mode,
name="grad",
)
# t0 = time.perf_counter()
gnewconv3d()
# print("grad", time.perf_counter() - t0)
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2
signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
utt.verify_grad(
lambda s, f: conv3d(s, f, border_mode=border_mode),
[signals, filters],
eps=1e-1,
mode=mode,
)
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor.tensor.nnet import corr
from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dvector, tensor4
from tests import unittest_tools as utt
from tests.tensor.nnet.test_abstract_conv import (
TestAsymmetricPadding,
TestCausalConv,
TestGroupedConvNoOptim,
TestUnsharedConv,
)
@pytest.mark.skipif(
pytensor.config.cxx == "",
reason="SciPy and cxx needed",
)
class TestCorr2D(utt.InferShapeTester):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.get_mode("FAST_RUN")
else:
mode = None
dtype = pytensor.config.floatX
def setup_method(self):
self.input = tensor4("input", dtype=self.dtype)
self.input.name = "default_V"
self.filters = tensor4("filters", dtype=self.dtype)
self.filters.name = "default_filters"
# This tests can run even when pytensor.config.blas__ldflags is empty.
super().setup_method()
def validate(
self,
image_shape,
filter_shape,
border_mode="valid",
subsample=(1, 1),
input=None,
filters=None,
verify_grad=True,
non_contiguous=False,
filter_dilation=(1, 1),
):
"""
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
"""
if not pytensor.config.cxx:
pytest.skip("Need cxx to test conv2d")
N_image_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in image_shape
]
N_filter_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in filter_shape
]
if input is None:
input = self.input
if filters is None:
filters = self.filters
# PYTENSOR IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def sym_CorrMM(input, filters):
# define pytensor graph and function
input.name = "input"
filters.name = "filters"
rval = corr.CorrMM(border_mode, subsample, filter_dilation)(input, filters)
rval.name = "corr_output"
return rval
output = sym_CorrMM(input, filters)
output.name = f"CorrMM()({input.name},{filters.name})"
pytensor_corr = pytensor.function([input, filters], output, mode=self.mode)
# initialize input and compute result
image_data = np.random.random(N_image_shape).astype(self.dtype)
filter_data = np.random.random(N_filter_shape).astype(self.dtype)
if non_contiguous:
image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
image_data = image_data.copy()
image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
filter_data = filter_data.copy()
filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
assert not image_data.flags["CONTIGUOUS"]
assert not filter_data.flags["CONTIGUOUS"]
pytensor_output = pytensor_corr(image_data, filter_data)
# REFERENCE IMPLEMENTATION
# Testing correlation, not convolution. Reverse filters.
filter_data_corr = np.array(filter_data[:, :, ::-1, ::-1], copy=True, order="C")
orig_image_data = image_data
img_shape2d = np.array(N_image_shape[-2:])
fil_shape2d = np.array(N_filter_shape[-2:])
dil_shape2d = np.array(filter_dilation)
dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1
subsample2d = np.array(subsample)
if border_mode == "full":
padHW = dil_fil_shape2d - 1
elif border_mode == "valid":
padHW = np.array([0, 0])
elif border_mode == "half":
padHW = np.floor(dil_fil_shape2d / 2).astype("int32")
elif isinstance(border_mode, tuple):
padHW = np.array(border_mode)
elif isinstance(border_mode, int):
padHW = np.array([border_mode, border_mode])
else:
raise NotImplementedError(f"Unsupported border_mode {border_mode}")
out_shape2d = (
np.floor((img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1
)
# avoid numpy deprecation
out_shape2d = out_shape2d.astype("int32")
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
ref_output = np.zeros(out_shape)
# loop over output feature maps
ref_output.fill(0)
image_data2 = np.zeros(
(
N_image_shape[0],
N_image_shape[1],
N_image_shape[2] + 2 * padHW[0],
N_image_shape[3] + 2 * padHW[1],
)
)
image_data2[
:,
:,
padHW[0] : padHW[0] + N_image_shape[2],
padHW[1] : padHW[1] + N_image_shape[3],
] = image_data
image_data = image_data2
N_image_shape = image_data.shape
for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
for im0 in range(N_image_shape[1]):
filter2d = filter_data_corr[nn, im0, :, :]
image2d = image_data[bb, im0, :, :]
for row in range(ref_output.shape[2]):
irow = row * subsample[0] # image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
ref_output[bb, nn, row, col] += (
image2d[
irow : irow
+ dil_fil_shape2d[0] : filter_dilation[0],
icol : icol
+ dil_fil_shape2d[1] : filter_dilation[1],
]
* filter2d[::-1, ::-1]
).sum()
utt.assert_allclose(ref_output, pytensor_output)
# TEST GRADIENT
if verify_grad:
utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data], mode=self.mode)
@pytest.mark.slow
def test_basic(self):
# Tests that basic correlations work for odd and even
# dimensions of image and filter shapes, as well as rectangular
# images and filters.
border_modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), (3, 3), 1]
img_shapes = [
(2, 2, 3, 3),
(3, 2, 8, 8),
(3, 2, 7, 5),
(3, 2, 7, 5),
(3, 2, 8, 8),
(3, 2, 7, 5),
]
fil_shapes = [
(2, 2, 2, 2),
(4, 2, 5, 5),
(5, 2, 2, 3),
(5, 2, 3, 2),
(4, 2, 5, 5),
(5, 2, 2, 3),
]
for border_mode in border_modes:
for img, fil in zip(img_shapes, fil_shapes):
self.validate(img, fil, border_mode, verify_grad=False)
# Very slow on with 'full' or 'half'
self.validate((1, 10, 213, 129), (46, 10, 212, 1), "valid", verify_grad=False)
def test_img_kernel_same_shape(self):
self.validate((3, 2, 3, 3), (4, 2, 3, 3), "full")
self.validate((3, 2, 3, 3), (4, 2, 3, 3), "valid")
self.validate((3, 2, 3, 3), (4, 2, 3, 3), "half")
self.validate((3, 2, 3, 3), (4, 2, 3, 3), (1, 1))
self.validate((3, 2, 3, 3), (4, 2, 3, 3), 1)
@pytest.mark.slow
def test_subsample(self):
# Tests correlation where subsampling != (1,1)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "valid", subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "half", subsample=(3, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), subsample=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), subsample=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), subsample=(3, 3))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3))
def test_filter_dilation(self):
# Tests correlation where filter dilation != (1,1)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", filter_dilation=(2, 2))
self.validate((3, 2, 14, 10), (5, 2, 2, 3), "valid", filter_dilation=(3, 1))
self.validate((1, 1, 14, 14), (1, 1, 3, 3), "valid", filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), "half", filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), filter_dilation=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), filter_dilation=(1, 2))
self.validate(
(1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3), filter_dilation=(2, 2)
)
@pytest.mark.slow
def test_shape_Constant_tensor(self):
# Tests correlation where the {image,filter}_shape is a Constant tensor.
as_t = at.as_tensor_variable
border_modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), (3, 3), 1]
for border_mode in border_modes:
self.validate(
(as_t(3), as_t(2), as_t(7), as_t(5)), (5, 2, 2, 3), border_mode
)
self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), border_mode)
self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), border_mode)
self.validate((3, 2, 7, 5), (as_t(5), as_t(2), as_t(2), as_t(3)), "valid")
self.validate((3, 2, 7, 5), as_t([5, 2, 2, 3]), border_mode)
self.validate(as_t([3, 2, 7, 5]), as_t([5, 2, 2, 3]), border_mode)
def test_invalid_filter_shape(self):
# Tests scenario where filter_shape[1] != input_shape[1]
with pytest.raises(ValueError):
self.validate((3, 2, 8, 8), (4, 3, 5, 5), "valid")
def test_full_mode(self):
# Tests basic correlation in full mode and case where filter
# is larger than the input image.
self.validate((3, 2, 5, 5), (4, 2, 8, 8), "full")
def f():
self.validate((3, 2, 5, 5), (4, 2, 8, 8), "valid")
with pytest.raises(Exception):
f()
def test_wrong_input(self):
# Make sure errors are raised when image and kernel are not 4D tensors
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dmatrix())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=dvector())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dtensor3())
@pytest.mark.skipif(not pytensor.config.cxx, reason="Need cxx for this test")
def test_dtype_upcast(self):
# Checks dtype upcast for CorrMM methods.
rng = np.random.default_rng(280284)
def rand(shape, dtype="float64"):
r = np.asarray(rng.random(shape), dtype=dtype)
return r * 2 - 1
ops = [corr.CorrMM, corr.CorrMM_gradWeights, corr.CorrMM_gradInputs]
a_shapes = [[4, 5, 6, 3], [1, 5, 6, 3], [1, 5, 6, 3]]
b_shapes = [[7, 5, 3, 2], [1, 5, 3, 1], [7, 1, 3, 1]]
dtypes = ["float32", "float64"]
for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
for a_dtype in dtypes:
for b_dtype in dtypes:
c_dtype = pytensor.scalar.upcast(a_dtype, b_dtype)
a_tens = tensor4(dtype=a_dtype)
b_tens = tensor4(dtype=b_dtype)
a_tens_val = rand(a_shape, dtype=a_dtype)
b_tens_val = rand(b_shape, dtype=b_dtype)
c_tens = op()(a_tens, b_tens)
f = pytensor.function([a_tens, b_tens], c_tens, mode=self.mode)
assert f(a_tens_val, b_tens_val).dtype == c_dtype
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.cxx == "",
reason="SciPy and cxx needed",
)
def test_infer_shape_forward(self):
rng = np.random.default_rng(280284)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corrMM = corr.CorrMM
adtens = dtensor4()
bdtens = dtensor4()
aivec_vals = [
[4, 5, 6, 3],
[6, 2, 8, 3],
[3, 6, 7, 5],
[3, 6, 7, 5],
[5, 2, 4, 3],
]
bivec_vals = [
[7, 5, 3, 2],
[4, 2, 5, 3],
[5, 6, 3, 2],
[5, 6, 2, 3],
[6, 2, 4, 3],
]
modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
self._compile_and_check(
[adtens, bdtens],
[cdtens],
[adtens_val, bdtens_val],
corrMM,
warn=False,
)
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or pytensor.config.cxx == "",
reason="SciPy and cxx needed",
)
def test_infer_shape_gradW(self):
rng = np.random.default_rng(280284)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corrMM = corr.CorrMM
gradW = corr.CorrMM_gradWeights
adtens = dtensor4()
bdtens = dtensor4()
aivec_vals = [
[1, 5, 6, 3],
[8, 2, 7, 3],
[1, 6, 9, 4],
[9, 6, 8, 5],
[9, 1, 6, 8],
]
bivec_vals = [
[7, 5, 3, 1],
[4, 2, 5, 3],
[12, 6, 3, 2],
[5, 6, 1, 3],
[11, 1, 3, 3],
]
modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
f = pytensor.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# CorrMM_gradWeights
shape = (
pytensor.shared(bivec_val[2]),
pytensor.shared(bivec_val[3]),
)
bdtens_g = gradW(border_mode=mode, subsample=subsample)(
adtens, cdtens, shape=shape
)
self._compile_and_check(
[adtens, cdtens],
[bdtens_g],
[adtens_val, cdtens_val],
gradW,
warn=False,
)
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
reason="Need cxx for this test",
)
def test_infer_shape_gradI(self):
rng = np.random.default_rng(280284)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corrMM = corr.CorrMM
gradI = corr.CorrMM_gradInputs
adtens = dtensor4()
bdtens = dtensor4()
aivec_vals = [
[1, 5, 6, 3],
[8, 2, 7, 3],
[1, 6, 9, 4],
[9, 6, 8, 5],
[9, 1, 6, 8],
]
bivec_vals = [
[7, 5, 3, 1],
[4, 2, 5, 3],
[12, 6, 3, 2],
[5, 6, 1, 3],
[7, 1, 3, 4],
]
modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
subsamples = [(1, 1), (2, 1), (1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# CorrMM
cdtens = corrMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
f = pytensor.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# CorrMM_gradInputs
shape = (
pytensor.shared(aivec_val[2]),
pytensor.shared(aivec_val[3]),
)
adtens_g = gradI(border_mode=mode, subsample=subsample)(
bdtens, cdtens, shape=shape
)
self._compile_and_check(
[bdtens, cdtens],
[adtens_g],
[bdtens_val, cdtens_val],
gradI,
warn=False,
)
def test_non_contiguous(self):
self.validate((2, 2, 3, 3), (2, 2, 2, 2), "valid", non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 3, 2), "valid", non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "full", non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), "half", non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", non_contiguous=True)
self.validate((3, 2, 8, 8), (4, 2, 5, 5), (1, 1), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 2), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), non_contiguous=True)
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 2, non_contiguous=True)
class TestGroupCorr2d(TestGroupedConvNoOptim):
mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
conv_op = corr.CorrMM
conv_gradw_op = corr.CorrMM_gradWeights
conv_gradi_op = corr.CorrMM_gradInputs
def test_graph(self):
# define common values first
groups = 3
rng = np.random.default_rng(280284)
bottom = rng.random((3, 6, 5, 5)).astype(pytensor.config.floatX)
kern = rng.random((9, 2, 3, 3)).astype(pytensor.config.floatX)
bottom_sym = tensor4("bottom")
kern_sym = tensor4("kern")
# grouped convolution graph
conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym)
gconv_func = pytensor.function(
[bottom_sym, kern_sym], conv_group, mode=self.mode
)
# Graph for the normal hard way
kern_offset = kern_sym.shape[0] // groups
bottom_offset = bottom_sym.shape[1] // groups
split_conv_output = [
self.conv()(
bottom_sym[:, i * bottom_offset : (i + 1) * bottom_offset, :, :],
kern_sym[i * kern_offset : (i + 1) * kern_offset, :, :, :],
)
for i in range(groups)
]
concatenated_output = at.concatenate(split_conv_output, axis=1)
conv_func = pytensor.function(
[bottom_sym, kern_sym], concatenated_output, mode=self.mode
)
# calculate outputs for each graph
gconv_output = gconv_func(bottom, kern)
conv_output = conv_func(bottom, kern)
# compare values
utt.assert_allclose(gconv_output, conv_output)
class TestUnsharedCorr2d(TestUnsharedConv):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
else:
mode = None
conv2d_op = corr.CorrMM
conv2d_gradw_op = corr.CorrMM_gradWeights
conv2d_gradi_op = corr.CorrMM_gradInputs
class TestAsymmetricCorr(TestAsymmetricPadding):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
else:
mode = None
conv2d_op = corr.CorrMM
conv2d_gradw_op = corr.CorrMM_gradWeights
conv2d_gradi_op = corr.CorrMM_gradInputs
class TestCausalCorr(TestCausalConv):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
else:
mode = None
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor.tensor.nnet import corr3d
from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dtensor5, tensor5, vector
from tests import unittest_tools as utt
from tests.tensor.nnet.test_abstract_conv import TestGroupedConv3dNoOptim
@pytest.mark.skipif(
pytensor.config.cxx == "",
reason="SciPy and cxx needed",
)
class TestCorr3D(utt.InferShapeTester):
if pytensor.config.mode == "FAST_COMPILE":
mode = pytensor.compile.get_mode("FAST_RUN")
else:
mode = None
dtype = pytensor.config.floatX
def setup_method(self):
self.input = tensor5("input", dtype=self.dtype)
self.input.name = "default_V"
self.filters = tensor5("filters", dtype=self.dtype)
self.filters.name = "default_filters"
# This tests can run even when pytensor.config.blas__ldflags is empty.
super().setup_method()
def validate(
self,
image_shape,
filter_shape,
border_mode="valid",
subsample=(1, 1, 1),
input=None,
filters=None,
verify_grad=True,
non_contiguous=False,
filter_dilation=(1, 1, 1),
):
"""
:param image_shape: The constant shape info passed to corr3dMM.
:param filter_shape: The constant shape info passed to corr3dMM.
"""
if not pytensor.config.cxx:
pytest.skip("Need cxx for this test")
N_image_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in image_shape
]
N_filter_shape = [
at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in filter_shape
]
if input is None:
input = self.input
if filters is None:
filters = self.filters
# PYTENSOR IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def sym_Corr3dMM(input, filters):
# define pytensor graph and function
input.name = "input"
filters.name = "filters"
rval = corr3d.Corr3dMM(border_mode, subsample, filter_dilation)(
input, filters
)
rval.name = "corr_output"
return rval
output = sym_Corr3dMM(input, filters)
output.name = f"Corr3dMM()({input.name},{filters.name})"
pytensor_corr = pytensor.function([input, filters], output, mode=self.mode)
# initialize input and compute result
rng = np.random.default_rng(28483)
image_data = rng.random(N_image_shape).astype(self.dtype)
filter_data = rng.random(N_filter_shape).astype(self.dtype)
image_data /= 10
filter_data /= 10
if non_contiguous:
image_data = np.transpose(image_data, axes=(0, 1, 4, 3, 2))
image_data = image_data.copy()
image_data = np.transpose(image_data, axes=(0, 1, 4, 3, 2))
filter_data = np.transpose(filter_data, axes=(0, 1, 4, 3, 2))
filter_data = filter_data.copy()
filter_data = np.transpose(filter_data, axes=(0, 1, 4, 3, 2))
assert not image_data.flags["CONTIGUOUS"]
assert not filter_data.flags["CONTIGUOUS"]
pytensor_output = pytensor_corr(image_data, filter_data)
# REFERENCE IMPLEMENTATION
# Testing correlation, not convolution. Reverse filters.
filter_data_corr = np.array(
filter_data[:, :, ::-1, ::-1, ::-1], copy=True, order="C"
)
orig_image_data = image_data
img_shape3d = np.array(N_image_shape[-3:])
fil_shape3d = np.array(N_filter_shape[-3:])
dil_shape3d = np.array(filter_dilation)
dil_fil_shape3d = (fil_shape3d - 1) * dil_shape3d + 1
subsample3d = np.array(subsample)
if border_mode == "full":
padHWD = dil_fil_shape3d - 1
elif border_mode == "valid":
padHWD = np.array([0, 0, 0])
elif border_mode == "half":
padHWD = np.floor(dil_fil_shape3d / 2).astype("int32")
elif isinstance(border_mode, tuple):
padHWD = np.array(border_mode)
elif isinstance(border_mode, int):
padHWD = np.array([border_mode, border_mode, border_mode])
else:
raise NotImplementedError(f"Unsupported border_mode {border_mode}")
out_shape3d = (
np.floor((img_shape3d + 2 * (padHWD) - dil_fil_shape3d) / subsample3d) + 1
)
# avoid numpy deprecation
out_shape3d = out_shape3d.astype("int32")
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape3d)
ref_output = np.zeros(out_shape)
# loop over output feature maps
ref_output.fill(0)
image_data2 = np.zeros(
(
N_image_shape[0],
N_image_shape[1],
N_image_shape[2] + 2 * padHWD[0],
N_image_shape[3] + 2 * padHWD[1],
N_image_shape[4] + 2 * padHWD[2],
)
)
image_data2[
:,
:,
padHWD[0] : padHWD[0] + N_image_shape[2],
padHWD[1] : padHWD[1] + N_image_shape[3],
padHWD[2] : padHWD[2] + N_image_shape[4],
] = image_data
image_data = image_data2
N_image_shape = image_data.shape
for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
for im0 in range(N_image_shape[1]):
filter3d = filter_data_corr[nn, im0, :, :, :]
image3d = image_data[bb, im0, :, :, :]
for row in range(ref_output.shape[2]):
irow = row * subsample[0] # image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
for slc in range(ref_output.shape[4]):
islc = slc * subsample[2] # image slice
ref_output[bb, nn, row, col, slc] += (
image3d[
irow : irow
+ dil_fil_shape3d[0] : filter_dilation[0],
icol : icol
+ dil_fil_shape3d[1] : filter_dilation[1],
islc : islc
+ dil_fil_shape3d[2] : filter_dilation[2],
]
* filter3d[::-1, ::-1, ::-1]
).sum()
utt.assert_allclose(pytensor_output, ref_output)
# TEST GRADIENT
if verify_grad:
utt.verify_grad(
sym_Corr3dMM, [orig_image_data, filter_data], mode=self.mode
)
@pytest.mark.slow
def test_basic(self):
# Tests that basic correlations work for odd and even
# dimensions of image and filter shapes, as well as rectangular
# images and filters.
border_modes = [
"valid",
"full",
"half",
(1, 1, 1),
(2, 1, 1),
(1, 2, 1),
(1, 1, 2),
(3, 3, 3),
1,
]
img_shapes = [
(2, 2, 3, 3, 3),
(3, 2, 8, 8, 8),
(3, 2, 7, 5, 5),
(3, 2, 7, 5, 5),
(1, 2, 8, 8, 8),
(1, 2, 7, 5, 5),
]
fil_shapes = [
(2, 2, 2, 2, 2),
(1, 2, 5, 5, 5),
(2, 2, 2, 3, 2),
(2, 2, 3, 2, 2),
(1, 2, 5, 5, 5),
(1, 2, 2, 3, 3),
]
for border_mode in border_modes:
for img, fil in zip(img_shapes, fil_shapes):
self.validate(img, fil, border_mode, verify_grad=False)
# Very slow on with 'full' or 'half'
self.validate((1, 2, 53, 29, 11), (13, 2, 12, 1, 1), "valid", verify_grad=False)
def test_img_kernel_same_shape(self):
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "full")
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "valid")
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "half")
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), (1, 1, 1))
self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 1)
@pytest.mark.slow
def test_subsample(self):
# Tests correlation where subsampling != (1,1,1)
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "valid", subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "full", subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "half", subsample=(3, 3, 3))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), subsample=(2, 2, 2))
self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), subsample=(2, 1, 1))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 2), subsample=(3, 3, 3))
self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 1, subsample=(3, 3, 3))
# Tests correlation where filter dilation != (1,1,1)
@pytest.mark.parametrize(
"image_shape, filter_shape, border_mode, filter_dilation",
[
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", (2, 2, 2)),
((3, 2, 14, 10, 10), (2, 2, 2, 3, 3), "valid", (3, 1, 1)),
((1, 1, 14, 14, 14), (1, 1, 3, 3, 3), "valid", (2, 3, 3)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", (2, 2, 2)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", (3, 1, 1)),
((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "full", (2, 3, 3)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", (2, 2, 2)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", (3, 1, 1)),
((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "half", (2, 3, 3)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), (2, 2, 2)),
((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), (2, 1, 1)),
((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 1), (1, 2, 1)),
((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 1, 2), (1, 1, 2)),
],
)
def test_filter_dilation(
self, image_shape, filter_shape, border_mode, filter_dilation
):
self.validate(
image_shape, filter_shape, border_mode, filter_dilation=filter_dilation
)
def test_filter_dilation_subsample(self):
self.validate(
(1, 1, 6, 6, 6),
(1, 1, 3, 3, 3),
1,
subsample=(3, 3, 3),
filter_dilation=(2, 2, 2),
)
@pytest.mark.parametrize(
"border_mode",
[
"valid",
"full",
"half",
(1, 1, 1),
(2, 1, 1),
(1, 2, 1),
(1, 1, 2),
(3, 3, 3),
1,
],
)
def test_shape_Constant_tensor(self, border_mode):
# Tests correlation where the {image,filter}_shape is a Constant tensor
as_t = at.as_tensor_variable
self.validate(
(as_t(3), as_t(2), as_t(7), as_t(5), as_t(5)), (5, 2, 2, 3, 3), border_mode
)
self.validate(as_t([3, 2, 7, 5, 5]), (5, 2, 2, 3, 3), border_mode)
self.validate(as_t((3, 2, 7, 5, 5)), (5, 2, 2, 3, 3), border_mode)
self.validate(
(3, 2, 7, 5, 5), (as_t(5), as_t(2), as_t(2), as_t(3), as_t(3)), "valid"
)
self.validate((3, 2, 7, 5, 5), as_t([5, 2, 2, 3, 3]), border_mode)
self.validate(as_t([3, 2, 7, 5, 5]), as_t([5, 2, 2, 3, 3]), border_mode)
def test_invalid_filter_shape(self):
# Tests scenario where filter_shape[1] != input_shape[1]
with pytest.raises(ValueError):
self.validate((3, 2, 8, 8, 8), (4, 3, 5, 5, 8), "valid")
def test_full_mode(self):
# Tests basic correlation in full mode and case where filter
# is larger than the input image.
self.validate((3, 1, 4, 4, 4), (2, 1, 5, 5, 5), "full")
def f():
self.validate((3, 2, 5, 5, 5), (4, 2, 8, 8, 8), "valid")
with pytest.raises(Exception):
f()
def test_wrong_input(self):
# Make sure errors are raised when image and kernel are not 5D tensors
with pytest.raises(Exception):
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dmatrix())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=vector())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dtensor3())
with pytest.raises(Exception):
self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dtensor4())
@pytest.mark.skipif(not pytensor.config.cxx, reason="Need cxx for this test")
def test_dtype_upcast(self):
# Checks dtype upcast for Corr3dMM methods.
rng = np.random.default_rng(28483)
def rand(shape, dtype="float64"):
r = np.asarray(rng.random(shape), dtype=dtype)
return r * 2 - 1
ops = [corr3d.Corr3dMM, corr3d.Corr3dMMGradWeights, corr3d.Corr3dMMGradInputs]
a_shapes = [[4, 5, 6, 3, 3], [1, 5, 6, 3, 3], [1, 5, 6, 3, 3]]
b_shapes = [[7, 5, 3, 2, 2], [1, 5, 3, 1, 1], [7, 1, 3, 1, 1]]
dtypes = ["float32", "float64"]
for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
for a_dtype in dtypes:
for b_dtype in dtypes:
c_dtype = pytensor.scalar.upcast(a_dtype, b_dtype)
a_tens = tensor5(dtype=a_dtype)
b_tens = tensor5(dtype=b_dtype)
a_tens_val = rand(a_shape, dtype=a_dtype)
b_tens_val = rand(b_shape, dtype=b_dtype)
c_tens = op()(a_tens, b_tens)
f = pytensor.function([a_tens, b_tens], c_tens, mode=self.mode)
assert f(a_tens_val, b_tens_val).dtype == c_dtype
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
reason="Need cxx for this test",
)
def test_infer_shape_forward(self):
rng = np.random.default_rng(28483)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
adtens = dtensor5()
bdtens = dtensor5()
aivec_vals = [
[4, 5, 6, 3, 3],
[6, 2, 8, 3, 3],
[3, 6, 7, 5, 5],
[3, 6, 7, 5, 5],
[5, 2, 4, 3, 3],
]
bivec_vals = [
[7, 5, 3, 2, 2],
[4, 2, 5, 3, 3],
[5, 6, 3, 2, 2],
[5, 6, 2, 3, 3],
[6, 2, 4, 3, 3],
]
modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
self._compile_and_check(
[adtens, bdtens],
[cdtens],
[adtens_val, bdtens_val],
corr3dMM,
warn=False,
)
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
reason="Need cxx for this test",
)
def test_infer_shape_gradW(self):
rng = np.random.default_rng(28483)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
gradW = corr3d.Corr3dMMGradWeights
adtens = dtensor5()
bdtens = dtensor5()
aivec_vals = [
[1, 5, 6, 3, 3],
[8, 2, 7, 3, 3],
[1, 6, 9, 4, 4],
[9, 6, 8, 5, 5],
[9, 1, 6, 8, 8],
]
bivec_vals = [
[7, 5, 3, 1, 1],
[4, 2, 5, 3, 3],
[12, 6, 3, 2, 2],
[5, 6, 1, 3, 3],
[11, 1, 3, 3, 3],
]
modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
f = pytensor.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# Corr3dMM_gradWeights
shape = (
pytensor.shared(bivec_val[2]),
pytensor.shared(bivec_val[3]),
pytensor.shared(bivec_val[4]),
)
bdtens_g = gradW(border_mode=mode, subsample=subsample)(
adtens, cdtens, shape=shape
)
self._compile_and_check(
[adtens, cdtens],
[bdtens_g],
[adtens_val, cdtens_val],
gradW,
warn=False,
)
@pytest.mark.slow
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
reason="Need cxx for this test",
)
def test_infer_shape_gradI(self):
rng = np.random.default_rng(28483)
def rand(*shape):
r = np.asarray(rng.random(shape), dtype="float64")
return r * 2 - 1
corr3dMM = corr3d.Corr3dMM
gradI = corr3d.Corr3dMMGradInputs
adtens = dtensor5()
bdtens = dtensor5()
aivec_vals = [
[1, 5, 6, 3, 3],
[8, 2, 7, 3, 3],
[1, 6, 9, 4, 4],
[9, 6, 8, 5, 5],
[9, 1, 6, 8, 8],
]
bivec_vals = [
[7, 5, 3, 1, 1],
[4, 2, 5, 3, 3],
[12, 6, 3, 2, 2],
[5, 6, 1, 3, 3],
[7, 1, 3, 4, 4],
]
modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
for mode in modes:
for subsample in subsamples:
# Corr3dMM
cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
adtens, bdtens
)
f = pytensor.function([adtens, bdtens], cdtens)
cdtens_val = f(adtens_val, bdtens_val)
# Corr3dMM_gradInputs
shape = (
pytensor.shared(aivec_val[2]),
pytensor.shared(aivec_val[3]),
pytensor.shared(aivec_val[4]),
)
adtens_g = gradI(border_mode=mode, subsample=subsample)(
bdtens, cdtens, shape=shape
)
self._compile_and_check(
[bdtens, cdtens],
[adtens_g],
[bdtens_val, cdtens_val],
gradI,
warn=False,
)
def test_non_contiguous(self):
self.validate((2, 2, 3, 3, 3), (2, 2, 2, 2, 2), "valid", non_contiguous=True)
self.validate((3, 2, 8, 8, 8), (2, 2, 5, 5, 5), "valid", non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (3, 2, 2, 3, 3), "valid", non_contiguous=True)
self.validate((3, 2, 7, 5, 5), (3, 2, 3, 2, 2), "valid", non_contiguous=True)
self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), "full", non_contiguous=True)
self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), "half", non_contiguous=True)
self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), (1, 1, 1), non_contiguous=True)
self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (1, 1, 2), non_contiguous=True)
self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (1, 2, 1), non_contiguous=True)
self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (2, 1, 1), non_contiguous=True)
class TestGroupCorr3d(TestGroupedConv3dNoOptim):
mode = pytensor.compile.get_mode("FAST_RUN")
conv_op = corr3d.Corr3dMM
conv_gradw_op = corr3d.Corr3dMMGradWeights
conv_gradi_op = corr3d.Corr3dMMGradInputs
flip_filter = True
is_dnn = False
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor.tensor.nnet.ctc import (
ConnectionistTemporalClassification,
ctc,
ctc_available,
)
from tests import unittest_tools as utt
def setup_torch_case():
# Test obtained from Torch tutorial at:
# https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md
# Layout, from slowest to fastest changing dimension, is (time, batchSize, inputLayerSize)
activations = np.asarray(
[
[[0, 0, 0, 0, 0], [1, 2, 3, 4, 5], [-5, -4, -3, -2, -1]],
[[0, 0, 0, 0, 0], [6, 7, 8, 9, 10], [-10, -9, -8, -7, -6]],
[[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]],
],
dtype=np.float32,
)
# Duration of each sequence
activation_times = np.asarray([1, 3, 3], dtype=np.int32)
# Labels for each sequence
labels = np.asarray([[1, -1], [3, 3], [2, 3]], dtype=np.int32)
expected_costs = np.asarray(
[1.609437943, 7.355742931, 4.938849926], dtype=np.float32
)
grads = [
[
[0.2, -0.8, 0.2, 0.2, 0.2],
[0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
[-0.02115798369, 0.03168492019, -0.8810571432, 0.2341216654, 0.636408627],
],
[
[0, 0, 0, 0, 0],
[-0.9883437753, 0.03168492019, 0.08612854034, 0.2341216654, 0.636408627],
[-0.02115798369, 0.03168492019, -0.1891518533, -0.4577836394, 0.636408627],
],
[
[0, 0, 0, 0, 0],
[0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
[-0.02115798369, 0.03168492019, 0.08612854034, -0.7330639958, 0.636408627],
],
]
expected_gradients = np.asarray(grads, dtype=np.float32)
return [activations, labels, activation_times, expected_costs, expected_gradients]
def setup_ctc_case():
activations = np.asarray(
[
[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
[[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]],
],
dtype=np.float32,
)
activation_times = np.asarray([2, 2], dtype=np.int32)
labels = np.asarray([[1, 2], [1, 2]], dtype=np.int32)
expected_costs = np.asarray([2.962858438, 3.053659201], dtype=np.float32)
grads = [
[
[0.177031219, -0.7081246376, 0.177031219, 0.177031219, 0.177031219],
[0.177031219, -0.8229685426, 0.291875124, 0.177031219, 0.177031219],
],
[
[0.291875124, 0.177031219, -0.8229685426, 0.177031219, 0.177031219],
[0.1786672771, 0.1786672771, -0.7334594727, 0.1974578798, 0.1786672771],
],
]
expected_gradients = np.asarray(grads, dtype=np.float32)
return [activations, labels, activation_times, expected_costs, expected_gradients]
def setup_grad_case():
activations = np.asarray(
[
[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
[[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]],
],
dtype=np.float32,
)
activation_times = np.asarray([2, 2], dtype=np.int32)
labels = np.asarray([[1, 2], [1, 2]], dtype=np.int32)
return [activations, labels, activation_times]
@pytest.mark.skipif(
not ctc_available(), reason="Optional library warp-ctc not available"
)
@pytest.mark.skipif(
pytensor.config.mode == "FAST_COMPILE" or pytensor.config.cxx == "",
reason="We need a c compiler",
)
class TestCTC:
"""
Test Baidu CTC wrapper implementation.
Expected values for costs and gradients are obtained through an external
C implementation, that uses the library directly.
"""
def run_ctc(
self, activations, labels, input_length, expected_costs, expected_grads
):
# Create symbolic variables
t_activations = pytensor.shared(activations, name="activations")
t_activation_times = pytensor.shared(input_length, name="activation_times")
t_labels = pytensor.shared(labels, name="labels")
t_cost = ctc(t_activations, t_labels, t_activation_times)
# Symbolic gradient of CTC cost
t_grad = at.grad(at.mean(t_cost), t_activations)
# Compile symbolic functions
train = pytensor.function([], [t_cost, t_grad])
cost, grad = train()
utt.assert_allclose(expected_grads / cost.shape[0], grad)
utt.assert_allclose(expected_costs, cost)
self.check_grads_disabled(t_activations, t_labels, t_activation_times)
def check_grads_disabled(self, activations, labels, input_length):
"""
Check if optimization to disable gradients is working
"""
ctc_cost = ctc(activations, labels, input_length)
ctc_function = pytensor.function([], [ctc_cost])
for node in ctc_function.maker.fgraph.apply_nodes:
if isinstance(node.op, ConnectionistTemporalClassification):
assert node.op.compute_grad is False
def test_torch_case(self):
(
activations,
labels,
input_length,
expected_costs,
expected_grads,
) = setup_torch_case()
self.run_ctc(activations, labels, input_length, expected_costs, expected_grads)
def test_ctc(self):
(
activations,
labels,
input_length,
expected_costs,
expected_grads,
) = setup_ctc_case()
self.run_ctc(activations, labels, input_length, expected_costs, expected_grads)
def test_verify_grad(self):
def ctc_op_functor(labels, in_lengths):
def wrapper(acts):
# Create auxiliary symbolic variables
t_activation_times = pytensor.shared(
in_lengths, name="activation_times"
)
t_labels = pytensor.shared(labels, name="labels")
return ctc(acts, t_labels, t_activation_times)
return wrapper
activations, labels, activation_times = setup_grad_case()
ctc_op = ctc_op_functor(labels, activation_times)
utt.verify_grad(ctc_op, [activations])
import numpy as np
import pytest
import pytensor
import pytensor.tensor as at
from pytensor import function, shared
from pytensor.configdefaults import config
from pytensor.tensor import nnet
from pytensor.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
from pytensor.tensor.type import dtensor4, ftensor4, ivector, matrix, tensor4
from tests import unittest_tools
mode_without_gpu = pytensor.compile.mode.get_default_mode().excluding("gpu")
class TestImages2Neibs(unittest_tools.InferShapeTester):
mode = mode_without_gpu
op = Images2Neibs
dtypes = ["int64", "float32", "float64"]
def test_neibs(self):
for shape, pshape in [
((10, 7, 18, 18), (2, 2)),
((10, 7, 6, 18), (3, 2)),
((5, 7, 66, 66), (33, 33)),
((5, 7, 68, 66), (34, 33)),
]:
for border in ["valid", "ignore_borders"]:
for dtype in self.dtypes:
images = shared(
np.arange(np.prod(shape), dtype=dtype).reshape(shape)
)
neib_shape = at.as_tensor_variable(pshape)
f = function(
[],
images2neibs(images, neib_shape, mode=border),
mode=self.mode,
)
# print images.get_value(borrow=True)
neibs = f()
# print neibs
g = function(
[],
neibs2images(neibs, neib_shape, images.shape),
mode=self.mode,
)
assert any(
isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()
)
# print g()
assert np.allclose(images.get_value(borrow=True), g())
def test_neibs_manual(self):
shape = (2, 3, 4, 4)
for dtype in self.dtypes:
images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
neib_shape = at.as_tensor_variable((2, 2))
for border in ["valid", "ignore_borders"]:
f = function(
[], images2neibs(images, neib_shape, mode=border), mode=self.mode
)
assert any(
isinstance(node.op, self.op) for node in f.maker.fgraph.toposort()
)
# print images.get_value(borrow=True)
neibs = f()
# print neibs
assert np.allclose(
neibs,
[
[0, 1, 4, 5],
[2, 3, 6, 7],
[8, 9, 12, 13],
[10, 11, 14, 15],
[16, 17, 20, 21],
[18, 19, 22, 23],
[24, 25, 28, 29],
[26, 27, 30, 31],
[32, 33, 36, 37],
[34, 35, 38, 39],
[40, 41, 44, 45],
[42, 43, 46, 47],
[48, 49, 52, 53],
[50, 51, 54, 55],
[56, 57, 60, 61],
[58, 59, 62, 63],
[64, 65, 68, 69],
[66, 67, 70, 71],
[72, 73, 76, 77],
[74, 75, 78, 79],
[80, 81, 84, 85],
[82, 83, 86, 87],
[88, 89, 92, 93],
[90, 91, 94, 95],
],
)
g = function(
[], neibs2images(neibs, neib_shape, images.shape), mode=self.mode
)
assert np.allclose(images.get_value(borrow=True), g())
def test_neibs_manual_step(self):
shape = (2, 3, 5, 5)
for dtype in self.dtypes:
images = shared(
np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
)
neib_shape = at.as_tensor_variable((3, 3))
neib_step = at.as_tensor_variable((2, 2))
for border in ["valid", "ignore_borders"]:
f = function(
[],
images2neibs(images, neib_shape, neib_step, mode=border),
mode=self.mode,
)
neibs = f()
assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
assert np.allclose(
neibs,
[
[0, 1, 2, 5, 6, 7, 10, 11, 12],
[2, 3, 4, 7, 8, 9, 12, 13, 14],
[10, 11, 12, 15, 16, 17, 20, 21, 22],
[12, 13, 14, 17, 18, 19, 22, 23, 24],
[25, 26, 27, 30, 31, 32, 35, 36, 37],
[27, 28, 29, 32, 33, 34, 37, 38, 39],
[35, 36, 37, 40, 41, 42, 45, 46, 47],
[37, 38, 39, 42, 43, 44, 47, 48, 49],
[50, 51, 52, 55, 56, 57, 60, 61, 62],
[52, 53, 54, 57, 58, 59, 62, 63, 64],
[60, 61, 62, 65, 66, 67, 70, 71, 72],
[62, 63, 64, 67, 68, 69, 72, 73, 74],
[75, 76, 77, 80, 81, 82, 85, 86, 87],
[77, 78, 79, 82, 83, 84, 87, 88, 89],
[85, 86, 87, 90, 91, 92, 95, 96, 97],
[87, 88, 89, 92, 93, 94, 97, 98, 99],
[100, 101, 102, 105, 106, 107, 110, 111, 112],
[102, 103, 104, 107, 108, 109, 112, 113, 114],
[110, 111, 112, 115, 116, 117, 120, 121, 122],
[112, 113, 114, 117, 118, 119, 122, 123, 124],
[125, 126, 127, 130, 131, 132, 135, 136, 137],
[127, 128, 129, 132, 133, 134, 137, 138, 139],
[135, 136, 137, 140, 141, 142, 145, 146, 147],
[137, 138, 139, 142, 143, 144, 147, 148, 149],
],
)
# neibs2images do not seam to support step != neib_shape
# g = function([], neibs2images(neibs, neib_shape, images.shape),
# mode=self.mode)
# print g()
# assert numpy.allclose(images.get_value(borrow=True), g())
@config.change_flags(compute_test_value="off")
def test_neibs_bad_shape(self):
shape = (2, 3, 10, 10)
for dtype in self.dtypes:
images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
for neib_shape in [(3, 2), (2, 3)]:
neib_shape = at.as_tensor_variable(neib_shape)
f = function([], images2neibs(images, neib_shape), mode=self.mode)
with pytest.raises(TypeError):
f()
# Test that ignore border work in that case.
f = function(
[],
images2neibs(images, neib_shape, mode="ignore_borders"),
mode=self.mode,
)
assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
f()
def test_neibs_wrap_centered_step_manual(self):
expected1 = [
[24, 20, 21, 4, 0, 1, 9, 5, 6],
[21, 22, 23, 1, 2, 3, 6, 7, 8],
[23, 24, 20, 3, 4, 0, 8, 9, 5],
[9, 5, 6, 14, 10, 11, 19, 15, 16],
[6, 7, 8, 11, 12, 13, 16, 17, 18],
[8, 9, 5, 13, 14, 10, 18, 19, 15],
[19, 15, 16, 24, 20, 21, 4, 0, 1],
[16, 17, 18, 21, 22, 23, 1, 2, 3],
[18, 19, 15, 23, 24, 20, 3, 4, 0],
]
expected2 = [
[24, 20, 21, 4, 0, 1, 9, 5, 6],
[22, 23, 24, 2, 3, 4, 7, 8, 9],
[14, 10, 11, 19, 15, 16, 24, 20, 21],
[12, 13, 14, 17, 18, 19, 22, 23, 24],
]
expected3 = [
[19, 15, 16, 24, 20, 21, 4, 0, 1, 9, 5, 6, 14, 10, 11],
[17, 18, 19, 22, 23, 24, 2, 3, 4, 7, 8, 9, 12, 13, 14],
[9, 5, 6, 14, 10, 11, 19, 15, 16, 24, 20, 21, 4, 0, 1],
[7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 2, 3, 4],
]
expected4 = [
[23, 24, 20, 21, 22, 3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
[21, 22, 23, 24, 20, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
[13, 14, 10, 11, 12, 18, 19, 15, 16, 17, 23, 24, 20, 21, 22],
[11, 12, 13, 14, 10, 16, 17, 18, 19, 15, 21, 22, 23, 24, 20],
]
expected5 = [
[24, 20, 21, 4, 0, 1, 9, 5, 6],
[22, 23, 24, 2, 3, 4, 7, 8, 9],
[9, 5, 6, 14, 10, 11, 19, 15, 16],
[7, 8, 9, 12, 13, 14, 17, 18, 19],
[19, 15, 16, 24, 20, 21, 4, 0, 1],
[17, 18, 19, 22, 23, 24, 2, 3, 4],
]
expected6 = [
[24, 20, 21, 4, 0, 1, 9, 5, 6],
[21, 22, 23, 1, 2, 3, 6, 7, 8],
[23, 24, 20, 3, 4, 0, 8, 9, 5],
[14, 10, 11, 19, 15, 16, 24, 20, 21],
[11, 12, 13, 16, 17, 18, 21, 22, 23],
[13, 14, 10, 18, 19, 15, 23, 24, 20],
]
# TODO test discontinuous image
for shp_idx, (shape, neib_shape, neib_step, expected) in enumerate(
[
[(7, 8, 5, 5), (3, 3), (2, 2), expected1],
[(7, 8, 5, 5), (3, 3), (3, 3), expected2],
[(7, 8, 5, 5), (5, 3), (3, 3), expected3],
[(7, 8, 5, 5), (3, 5), (3, 3), expected4],
[(80, 90, 5, 5), (3, 3), (2, 3), expected5],
[(1025, 9, 5, 5), (3, 3), (3, 2), expected6],
[(1, 1, 5, 1035), (3, 3), (3, 3), None],
[(1, 1, 1045, 5), (3, 3), (3, 3), None],
]
):
for dtype in self.dtypes:
images = shared(
np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
)
neib_shape = at.as_tensor_variable(neib_shape)
neib_step = at.as_tensor_variable(neib_step)
expected = np.asarray(expected)
f = function(
[],
images2neibs(images, neib_shape, neib_step, mode="wrap_centered"),
mode=self.mode,
)
neibs = f()
if expected.size > 1:
for i in range(shape[0] * shape[1]):
assert np.allclose(
neibs[
i * expected.shape[0] : (i + 1) * expected.shape[0], :
],
expected + 25 * i,
), "wrap_centered"
assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
# g = function([], neibs2images(neibs, neib_shape, images.shape), mode=self.mode)
# TODO: why this is commented?
# assert numpy.allclose(images.get_value(borrow=True), g())
@pytest.mark.slow
def test_neibs_half_step_by_valid(self):
neib_shapes = ((3, 3), (3, 5), (5, 3))
for shp_idx, (shape, neib_step) in enumerate(
[
[(7, 8, 5, 5), (1, 1)],
[(7, 8, 5, 5), (2, 2)],
[(7, 8, 5, 5), (4, 4)],
[(7, 8, 5, 5), (1, 4)],
[(7, 8, 5, 5), (4, 1)],
[(80, 90, 5, 5), (1, 2)],
[(1025, 9, 5, 5), (2, 1)],
[(1, 1, 5, 1037), (2, 4)],
[(1, 1, 1045, 5), (4, 2)],
]
):
for neib_shape in neib_shapes:
for dtype in self.dtypes:
x = pytensor.shared(np.random.standard_normal(shape).astype(dtype))
extra = (neib_shape[0] // 2, neib_shape[1] // 2)
padded_shape = (
x.shape[0],
x.shape[1],
x.shape[2] + 2 * extra[0],
x.shape[3] + 2 * extra[1],
)
padded_x = at.zeros(padded_shape)
padded_x = at.set_subtensor(
padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
)
x_using_valid = images2neibs(
padded_x, neib_shape, neib_step, mode="valid"
)
x_using_half = images2neibs(x, neib_shape, neib_step, mode="half")
f_valid = pytensor.function([], x_using_valid, mode="FAST_RUN")
f_half = pytensor.function([], x_using_half, mode=self.mode)
unittest_tools.assert_allclose(f_valid(), f_half())
@pytest.mark.slow
def test_neibs_full_step_by_valid(self):
for shp_idx, (shape, neib_step, neib_shapes) in enumerate(
[
[(7, 8, 5, 5), (1, 1), ((3, 3), (3, 5), (5, 3))],
[(7, 8, 5, 5), (2, 2), ((3, 3), (3, 5), (5, 3))],
[(7, 8, 6, 6), (3, 3), ((2, 2), (2, 5), (5, 2))],
[(7, 8, 6, 6), (1, 3), ((2, 2), (2, 5), (5, 2))],
[(7, 8, 6, 6), (3, 1), ((2, 2), (2, 5), (5, 2))],
[(80, 90, 5, 5), (1, 2), ((3, 3), (3, 5), (5, 3))],
[(1025, 9, 5, 5), (2, 1), ((3, 3), (3, 5), (5, 3))],
[(1, 1, 11, 1037), (2, 3), ((3, 3), (5, 3))],
[(1, 1, 1043, 11), (3, 2), ((3, 3), (3, 5))],
]
):
for neib_shape in neib_shapes:
for dtype in self.dtypes:
x = pytensor.shared(np.random.standard_normal(shape).astype(dtype))
extra = (neib_shape[0] - 1, neib_shape[1] - 1)
padded_shape = (
x.shape[0],
x.shape[1],
x.shape[2] + 2 * extra[0],
x.shape[3] + 2 * extra[1],
)
padded_x = at.zeros(padded_shape)
padded_x = at.set_subtensor(
padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
)
x_using_valid = images2neibs(
padded_x, neib_shape, neib_step, mode="valid"
)
x_using_full = images2neibs(x, neib_shape, neib_step, mode="full")
f_valid = pytensor.function([], x_using_valid, mode="FAST_RUN")
f_full = pytensor.function([], x_using_full, mode=self.mode)
unittest_tools.assert_allclose(f_valid(), f_full())
@config.change_flags(compute_test_value="off")
def test_neibs_bad_shape_wrap_centered(self):
shape = (2, 3, 10, 10)
for dtype in self.dtypes:
images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
for neib_shape in [(3, 2), (2, 3)]:
neib_shape = at.as_tensor_variable(neib_shape)
f = function(
[],
images2neibs(images, neib_shape, mode="wrap_centered"),
mode=self.mode,
)
with pytest.raises(TypeError):
f()
for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]:
images = shared(np.arange(np.prod(shape)).reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function(
[],
images2neibs(images, neib_shape, mode="wrap_centered"),
mode=self.mode,
)
with pytest.raises(TypeError):
f()
# Test a valid shapes
shape = (2, 3, 3, 3)
images = shared(np.arange(np.prod(shape)).reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function(
[],
images2neibs(images, neib_shape, mode="wrap_centered"),
mode=self.mode,
)
f()
def test_grad_wrap_centered(self):
# It is not implemented for now. So test that we raise an error.
shape = (2, 3, 6, 6)
images_val = np.random.random(shape).astype("float32")
def fn(images):
return images2neibs(images, (3, 3), mode="wrap_centered")
with pytest.raises(TypeError):
unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
def test_grad_half(self):
# It is not implemented for now. So test that we raise an error.
shape = (2, 3, 6, 6)
rng = np.random.default_rng(28483)
images_val = rng.random(shape).astype("float32")
def fn(images):
return images2neibs(images, (3, 3), mode="half")
with pytest.raises(TypeError):
unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
def test_grad_full(self):
# It is not implemented for now. So test that we raise an error.
shape = (2, 3, 6, 6)
rng = np.random.default_rng(28483)
images_val = rng.random(shape).astype("float32")
def fn(images):
return images2neibs(images, (3, 3), mode="full")
with pytest.raises(TypeError):
unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
def test_grad_valid(self):
shape = (2, 3, 6, 6)
rng = np.random.default_rng(28483)
images_val = rng.random(shape).astype("float32")
def fn(images):
return images2neibs(images, (2, 2))
unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
def fn(images):
return images2neibs(images, (3, 2), (1, 2))
unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
def fn(images):
return images2neibs(images, (1, 2), (5, 2))
unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
def test_grad_ignore_border(self):
shape = (2, 3, 5, 5)
rng = np.random.default_rng(28483)
images_val = rng.random(shape).astype("float32")
def fn(images):
return images2neibs(images, (2, 2), mode="ignore_borders")
unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
def test_neibs2images_grad(self):
# say we had images of size (2, 3, 10, 10)
# then we extracted 2x2 neighbors on this, we get (2 * 3 * 5 * 5, 4)
rng = np.random.default_rng(28483)
neibs_val = rng.random((150, 4))
def fn(neibs):
return neibs2images(neibs, (2, 2), (2, 3, 10, 10))
unittest_tools.verify_grad(fn, [neibs_val], mode=self.mode, eps=0.1)
def test_neibs_valid_with_inconsistent_borders(self):
shape = (2, 3, 5, 5)
images = dtensor4()
images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
f = pytensor.function(
[images],
at.sqr(images2neibs(images, (2, 2), mode="valid")),
mode=self.mode,
)
with pytest.raises(TypeError):
f(images_val)
def test_neibs_half_with_inconsistent_borders(self):
shape = (2, 3, 5, 5)
images = dtensor4()
images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
f = pytensor.function(
[images], at.sqr(images2neibs(images, (2, 2), mode="half")), mode=self.mode
)
with pytest.raises(TypeError):
f(images_val)
def test_neibs_full_with_inconsistent_borders(self):
shape = (2, 3, 5, 5)
images = dtensor4()
images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
f = pytensor.function(
[images], at.sqr(images2neibs(images, (2, 2), mode="full")), mode=self.mode
)
with pytest.raises(TypeError):
f(images_val)
def test_can_not_infer_nb_dim(self):
# Was reported in gh-5613. Test that we do not crash
# or that we crash in a few other case found while
# investigating that case
img = tensor4("img")
patches = nnet.neighbours.images2neibs(img, [16, 16])
extractPatches = pytensor.function([img], patches, mode=self.mode)
patsRecovery = matrix("patsRecovery")
original_size = ivector("original_size")
for mode in ["valid", "ignore_borders"]:
out = neibs2images(patsRecovery, (16, 16), original_size, mode=mode)
f = pytensor.function([patsRecovery, original_size], out, mode=self.mode)
im_val = np.ones((1, 3, 320, 320), dtype=np.float32)
neibs = extractPatches(im_val)
# TODO FIXME: Make this a real test and `assert` something
f(neibs, im_val.shape)
# Wrong number of dimensions
with pytest.raises(ValueError):
f(neibs, (1, 1, 3, 320, 320))
# End up with a step of 0
# This can lead to division by zero in DebugMode
with pytest.raises((ValueError, ZeroDivisionError)):
f(neibs, (3, 320, 320, 1))
def speed_neibs(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function([], images2neibs(images, neib_shape), mode=self.mode)
for i in range(1000):
f()
def speed_neibs_wrap_centered(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function(
[], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode
)
for i in range(1000):
f()
def speed_neibs_half(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function([], images2neibs(images, neib_shape, mode="half"), mode=self.mode)
for i in range(1000):
f()
def speed_neibs_full(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
neib_shape = at.as_tensor_variable((3, 3))
f = function([], images2neibs(images, neib_shape, mode="full"), mode=self.mode)
for i in range(1000):
f()
def test_infer_shape(self):
shape = (100, 40, 6, 3)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 1), mode="valid")],
[images],
Images2Neibs,
)
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 3), mode="valid")],
[images],
Images2Neibs,
)
shape = (100, 40, 5, 4)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 1), mode="ignore_borders")],
[images],
Images2Neibs,
)
shape = (100, 40, 5, 3)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 3), mode="ignore_borders")],
[images],
Images2Neibs,
)
shape = (100, 40, 6, 7)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 2), mode="ignore_borders")],
[images],
Images2Neibs,
)
shape = (100, 40, 5, 10)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(3, 3), mode="wrap_centered")],
[images],
Images2Neibs,
)
shape = (100, 40, 6, 4)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 1), mode="half")],
[images],
Images2Neibs,
)
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 3), mode="half")],
[images],
Images2Neibs,
)
shape = (100, 40, 6, 5)
images = np.ones(shape).astype("float32")
x = ftensor4()
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 1), mode="full")],
[images],
Images2Neibs,
)
self._compile_and_check(
[x],
[images2neibs(x, neib_shape=(2, 3), mode="full")],
[images],
Images2Neibs,
)
import pytensor
from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.tensor.nnet.blocksparse import (
sparse_block_dot,
sparse_block_gemv,
sparse_block_gemv_inplace,
sparse_block_outer,
sparse_block_outer_inplace,
)
from pytensor.tensor.type import fmatrix, ftensor3, ftensor4, lmatrix
from tests.unittest_tools import assertFailure_fast
def test_blocksparse_inplace_gemv_opt():
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = lmatrix()
oIdx = lmatrix()
o = sparse_block_dot(W, h, iIdx, b, oIdx)
f = pytensor.function([W, h, iIdx, b, oIdx], o)
if pytensor.config.mode == "FAST_COMPILE":
assert not f.maker.fgraph.toposort()[-1].op.inplace
assert check_stack_trace(f, ops_to_check=[sparse_block_gemv])
else:
assert f.maker.fgraph.toposort()[-1].op.inplace
assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
if pytensor.config.mode != "FAST_COMPILE":
test_blocksparse_inplace_gemv_opt = assertFailure_fast(
test_blocksparse_inplace_gemv_opt
)
def test_blocksparse_inplace_outer_opt():
b = fmatrix()
W = ftensor4()
h = ftensor3()
iIdx = lmatrix()
oIdx = lmatrix()
o = sparse_block_dot(W, h, iIdx, b, oIdx)
f = pytensor.function(
[W, h, iIdx, b, oIdx], [o, pytensor.gradient.grad(o.sum(), wrt=W)]
)
if pytensor.config.mode == "FAST_COMPILE":
assert not f.maker.fgraph.toposort()[-1].op.inplace
assert check_stack_trace(f, ops_to_check=sparse_block_outer)
else:
assert f.maker.fgraph.toposort()[-1].op.inplace
assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
import numpy as np
import pytest
import pytensor
from pytensor.compile.mode import get_default_mode, get_mode
from pytensor.configdefaults import config
from pytensor.graph.rewriting.basic import check_stack_trace
from pytensor.scalar.basic import Composite
from pytensor.tensor.elemwise import Elemwise
from pytensor.tensor.inplace import sigmoid_inplace
from pytensor.tensor.math import clip, sigmoid
from pytensor.tensor.nnet.sigm import (
hard_sigmoid,
ultra_fast_scalar_sigmoid,
ultra_fast_sigmoid,
ultra_fast_sigmoid_inplace,
)
from pytensor.tensor.type import matrix
from tests.tensor.utils import (
_good_broadcast_unary_normal_no_complex,
check_floatX,
copymod,
makeBroadcastTester,
upcast_int8_nfunc,
)
TestUltraFastSigmoidBroadcast = makeBroadcastTester(
op=ultra_fast_sigmoid,
expected=upcast_int8_nfunc(
lambda inputs: check_floatX(inputs, 1 / (1 + np.exp(-inputs)))
),
good=copymod(
_good_broadcast_unary_normal_no_complex, without=["uint16"]
), # numpy fucnting overflows with uint16.
# grad=_grad_broadcast_unary_normal,
name="UltraFastSigmoidTester",
# This is an approx of the sigmoid. That is why we raise eps
eps=5e-2,
)
TestHardSigmoidBroadcast = makeBroadcastTester(
op=hard_sigmoid,
expected=upcast_int8_nfunc(
lambda inputs: check_floatX(inputs, 1 / (1 + np.exp(-inputs)))
),
good=copymod(
_good_broadcast_unary_normal_no_complex, without=["uint16"]
), # numpy fucnting overflows with uint16.
# grad=_grad_broadcast_unary_normal,
name="HardSigmoidTester",
# This is an approx of the sigmoid. That is why we raise eps
eps=1e-1,
)
class TestSpecialSigmoidOpts:
def get_mode(self, excluding=None):
"""
Return appropriate mode for the tests.
:param excluding: List of optimizations to exclude.
:return: The current default mode unless the `config.mode` option is
set to 'FAST_COMPILE' (in which case it is replaced by the 'FAST_RUN'
mode), without the optimizations specified in `excluding`.
"""
if excluding is None:
excluding = []
m = config.mode
if m == "FAST_COMPILE":
mode = get_mode("FAST_RUN")
else:
mode = get_default_mode()
if excluding:
return mode.excluding(*excluding)
else:
return mode
def test_local_ultra_fast_sigmoid(self):
x = matrix("x")
s = sigmoid(x)
mode = self.get_mode("local_ultra_fast_sigmoid")
f = pytensor.function([x], s, mode=mode)
assert check_stack_trace(f, ops_to_check=sigmoid)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert topo[0].op == sigmoid
mode = self.get_mode().including("local_ultra_fast_sigmoid")
f = pytensor.function([x], s, mode=mode)
assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid)
topo = f.maker.fgraph.toposort()
assert topo[0].op == ultra_fast_sigmoid
assert len(topo) == 1
s = sigmoid_inplace(x)
f = pytensor.function([x], s, mode=mode, accept_inplace=True)
assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid_inplace)
topo = f.maker.fgraph.toposort()
assert topo[0].op == ultra_fast_sigmoid_inplace
assert len(topo) == 1
@pytest.mark.skipif(config.cxx == "", reason="Needs a C compiler.")
def test_composite_c_code(self):
"""Make sure this `Op`'s `c_code` works within a `Composite`."""
x = matrix("x")
mode = get_mode("FAST_RUN").including("local_ultra_fast_sigmoid")
f = pytensor.function([x], sigmoid(x) + sigmoid(x + 1), mode=mode)
topo = f.maker.fgraph.toposort()
assert isinstance(topo[0].op, Elemwise)
assert isinstance(topo[0].op.scalar_op, Composite)
assert ultra_fast_scalar_sigmoid in {
node.op for node in topo[0].op.scalar_op.fgraph.toposort()
}
assert len(topo) == 1
def test_local_hard_sigmoid(self):
x = matrix("x")
s = sigmoid(x)
mode = self.get_mode("local_hard_sigmoid")
f = pytensor.function([x], s, mode=mode)
assert check_stack_trace(f, ops_to_check=sigmoid)
topo = f.maker.fgraph.toposort()
assert topo[0].op == sigmoid
assert len(topo) == 1
mode = self.get_mode().including("local_hard_sigmoid")
f = pytensor.function([x], s, mode=mode)
topo = f.maker.fgraph.toposort()
assert not any(n.op == sigmoid for n in topo)
f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
mode2 = mode.excluding("fusion").excluding("inplace")
f2 = pytensor.function([x], s, mode=mode2)
assert check_stack_trace(f2, ops_to_check=clip)
import copy
import numpy as np
from pytensor.compile.function import function
from pytensor.compile.io import Out
from pytensor.tensor.math import dot
from pytensor.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
from pytensor.tensor.type import dmatrix, dvector, ivector, matrix
def test_bug_2009_07_17_borrowed_output():
# Regression test for a bug where output was borrowed by mistake.
a = dmatrix()
b = dmatrix()
# The output should *NOT* be borrowed.
g = function([a, b], Out(dot(a, b), borrow=False))
x = np.zeros((1, 2))
y = np.ones((2, 5))
z = g(x, y)
# print(z) # Should be zero.
x.fill(1)
# print(g(x, y)) # Should be non-zero.
# print(z) # Should still be zero.
assert np.linalg.norm(z) == 0
# The code above was supposed to fail when it was written (or, more
# accurately, on the next revision, i.e. when it was merged with the
# rest of the code, i.e. on revision cac9c9e9f08e).
# However, for some reason, it does not fail anymore when at this revision.
# Thus, a new test (below) was added that exhibits the same issue. Note
# that it may better be moved into the test_nnet.py test file if it turns
# out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias',
# and was not a more general issue.
test_output_activation_no_bias = dmatrix()
test_b2 = dvector()
test_target = ivector()
nll_softmax_argmax = crossentropy_softmax_argmax_1hot_with_bias(
test_output_activation_no_bias, test_b2, test_target
)
output = nll_softmax_argmax[1]
g = function(
[test_output_activation_no_bias, test_b2, test_target],
Out(output, borrow=False),
)
a = np.zeros((1, 5))
b = np.ones(5)
c = np.zeros(1, dtype=np.int32)
z = g(a, b, c)
z_backup = copy.copy(z)
id_z = id(z)
# print(f"Output z after first call: {z}")
a[0, 0] = 1
id_other = id(g(a, b, c))
# print(f"Output z after second call: {z}")
# Ensure that calling the function again returns a pointer towards a new
# array.
assert id_z != id_other
# Just to be 100% sure, ensure that z was not altered.
assert (z == z_backup).all()
def test_deepcopied_type_filter():
a = copy.deepcopy(matrix())
# The following should run cleanly.
# As of commit 731e2d2fa68487733320d341d08b454a50c90d12
# it was failing.
a.type.filter(np.ones((2, 2), dtype=a.dtype), strict=True)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论