提交 0d197386 authored 作者: notoraptor's avatar notoraptor

Update. Many of @abergeron comments have been taken account.

Tests have been rewritten, and they run so that a CPU computation and a GPU computation are always performed (separately) on the same input. This allow to run the tests with Theano profiling flags and then compare the execution time of MaxAndArgmax (CPU) and GpuMaxAndArgmax (GPU). Some code has also been modified in theano/tensor/basic.py, related to MaxAndArgmax, to make the API more uniform and to put most of axis checking in maxandargmax wrapper instead of in make_node functions of (Gpu)MaxAndArgmax.
上级 37115ad1
...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, ...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined, _get_props) reg_context, get_context, ContextNotDefined, _get_props)
from .basic_ops import as_gpuarray_variable from .basic_ops import as_gpuarray_variable
from . import fft, dnn, opt, nerv, extra_ops, multinomial from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction
def transfer(x, target): def transfer(x, target):
try: try:
......
...@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import gpu_maxandargmax
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
......
import os import os
import numpy import numpy
from six import integer_types
import theano import theano
from theano.gof import Variable, Op, Apply from theano.gof import Op, Apply
from theano.tensor.type_other import NoneConst
from theano.tensor.var import TensorConstant from theano.tensor.var import TensorConstant
# from theano.tensor import as_tensor_variable
from .basic_ops import (infer_context_name, as_gpuarray_variable) from .basic_ops import (infer_context_name, as_gpuarray_variable)
from .type import GpuArrayType from .type import GpuArrayType
...@@ -25,26 +23,13 @@ class GpuMaxAndArgmax(Op): ...@@ -25,26 +23,13 @@ class GpuMaxAndArgmax(Op):
def make_node(self, X, axis=None): def make_node(self, X, axis=None):
context_name = infer_context_name(X) context_name = infer_context_name(X)
# Check axis and convert it to Python variable. if axis is None:
if (isinstance(axis, (integer_types, numpy.integer)) or axis = range(X.type.ndim)
(isinstance(axis, numpy.ndarray) and axis.ndim == 0)): elif isinstance(axis, TensorConstant) and isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(a) for a in axis]
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = None
elif not isinstance(axis, TensorConstant):
raise TypeError("MaxAndArgmax needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or axis.dtype.startswith("uint"))
if (isinstance(axis.data, (integer_types, numpy.integer)) or
(isinstance(axis.data, numpy.ndarray) and axis.data.ndim == 0)):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data] axis = [int(i) for i in axis.data]
elif not isinstance(axis, list):
raise TypeError("Axis must be a list. Got %s" % axis)
# Make axis entries non-negative, and verify that axes are valid. # Make axis entries non-negative, and verify that axes are valid.
if isinstance(axis, list):
for idx in xrange(len(axis)): for idx in xrange(len(axis)):
if axis[idx] < 0: if axis[idx] < 0:
axis[idx] += X.type.ndim axis[idx] += X.type.ndim
...@@ -52,22 +37,10 @@ class GpuMaxAndArgmax(Op): ...@@ -52,22 +37,10 @@ class GpuMaxAndArgmax(Op):
raise ValueError('Invalid axis: %s (the number of dimensions of the ' raise ValueError('Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (axis[idx], X.type.ndim)) 'input is: %s)' % (axis[idx], X.type.ndim))
# Sort axes and make them unique. # Sort axes and make them unique.
axis_set = set() # used to build "broadcastable" variable below. axis_set = set(axis) # used to build "broadcastable" variable below.
all_axes = [] axis = list(axis_set)
if isinstance(axis, list): axis.sort()
axis_set = set(axis) axis = theano.tensor.as_tensor_variable(axis)
all_axes = list(axis_set)
all_axes.sort()
if all_axes == range(X.type.ndim):
axis = None
else:
all_axes = range(X.ndim)
axis_set = set(all_axes)
if axis is None:
axis = NoneConst.clone()
else:
axis = theano.tensor.as_tensor_variable(all_axes)
# assert axis.ndim == 1
inputs = [as_gpuarray_variable(X, context_name), axis] inputs = [as_gpuarray_variable(X, context_name), axis]
# We keep the original broadcastable flags for dimensions on which # We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax. # we do not perform the max / argmax.
...@@ -78,13 +51,11 @@ class GpuMaxAndArgmax(Op): ...@@ -78,13 +51,11 @@ class GpuMaxAndArgmax(Op):
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
X, axes = inputs # NB: I must rewrite this method with pygpu functions instead of numpy functions.
x, axes = inputs
max, max_idx = outputs max, max_idx = outputs
if axes is None: X = numpy.asarray(x)
axes = tuple(range(X.ndim))
else:
axes = tuple(axes) axes = tuple(axes)
# axes = tuple(int(ax) for ax in axes)
max[0] = theano._asarray(numpy.max(X, axes), dtype=node.outputs[0].dtype) max[0] = theano._asarray(numpy.max(X, axes), dtype=node.outputs[0].dtype)
# Numpy does not support multiple axes for argmax # Numpy does not support multiple axes for argmax
# Work around # Work around
...@@ -110,45 +81,33 @@ class GpuMaxAndArgmax(Op): ...@@ -110,45 +81,33 @@ class GpuMaxAndArgmax(Op):
# Recall: fail = sub['fail'] # Recall: fail = sub['fail']
max_typecode = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype) max_typecode = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
argmax_typecode = pygpu.gpuarray.dtype_to_typecode(self.argmax_dtype) argmax_typecode = pygpu.gpuarray.dtype_to_typecode(self.argmax_dtype)
# axes_ctype = pygpu.gpuarray.dtype_to_ctype(node.inputs[1].dtype)
axes_ctype = 'int64_t' axes_ctype = 'int64_t'
assert node.inputs[1].ndim == 1
ret = """ ret = """
GpuArray temp;
GpuArray* %(name)s_input = &%(X)s->ga; GpuArray* %(name)s_input = &%(X)s->ga;
size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s); size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s);
"""
if NoneConst.equals(node.inputs[1]):
ret += """
unsigned %(name)s_redux_len = %(name)s_input_ndim;
unsigned* %(name)s_axes_to_reduce = new unsigned[%(name)s_redux_len];
for(unsigned i = 0; i < %(name)s_redux_len; ++i) {
%(name)s_axes_to_reduce[i] = i;
}
"""
else:
assert node.inputs[1].ndim == 1
ret += """
unsigned %(name)s_redux_len = PyArray_DIM(%(axes)s, 0); unsigned %(name)s_redux_len = PyArray_DIM(%(axes)s, 0);
unsigned* %(name)s_axes_to_reduce = new unsigned[%(name)s_redux_len]; unsigned* %(name)s_axes_to_reduce = (unsigned*)malloc(%(name)s_redux_len * sizeof(unsigned));
for(unsigned i = 0; i < %(name)s_redux_len; ++i) { for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
%(name)s_axes_to_reduce[i] = %(name)s_axes_to_reduce[i] = (unsigned) (*(%(axes_ctype)s*)PyArray_GETPTR1(%(axes)s, i));
(unsigned)( ((%(axes_ctype)s*)PyArray_DATA(%(axes)s)) [i * (PyArray_STRIDES(%(axes)s)[0] / sizeof(%(axes_ctype)s))] );
} }
"""
ret += """
size_t %(name)s_output_ndim = %(name)s_input_ndim - %(name)s_redux_len; size_t %(name)s_output_ndim = %(name)s_input_ndim - %(name)s_redux_len;
size_t* %(name)s_output_dims = NULL; size_t* %(name)s_output_dims = NULL;
if(%(name)s_output_ndim == 0) { if (%(name)s_output_ndim == 0) {
/* Current backend function GpuArray_maxandargmax does not work when /* Current backend function GpuArray_maxandargmax does not work when
* all axes need to be reduced. So to handle this case, we create a view * all axes need to be reduced. So to handle this case, we create a view
* of the input as a matrix with 1 row and as many columns as elements * of the input as a matrix with 1 row and as many columns as elements
* in the input, so that the 2nd dimenson of the matrix will be reduced. */ * in the input, so that the 2nd dimenson of the matrix will be reduced. */
size_t total_size = 1; size_t total_size = 1;
for(size_t i = 0; i < %(name)s_input_ndim; ++i) { for (size_t i = 0; i < %(name)s_input_ndim; ++i) {
total_size *= PyGpuArray_DIM(%(X)s, i); total_size *= PyGpuArray_DIM(%(X)s, i);
} }
size_t newdims[2] = {1, total_size}; size_t newdims[2] = {1, total_size};
%(name)s_input = new GpuArray; %(name)s_input = &temp;
if(GA_NO_ERROR != if (GA_NO_ERROR !=
GpuArray_reshape(%(name)s_input, &%(X)s->ga, 2, newdims, GA_ANY_ORDER, 0) GpuArray_reshape(%(name)s_input, &%(X)s->ga, 2, newdims, GA_ANY_ORDER, 0)
) { ) {
%(fail)s %(fail)s
...@@ -156,34 +115,34 @@ class GpuMaxAndArgmax(Op): ...@@ -156,34 +115,34 @@ class GpuMaxAndArgmax(Op):
%(name)s_redux_len = 1; %(name)s_redux_len = 1;
%(name)s_axes_to_reduce[0] = 1; %(name)s_axes_to_reduce[0] = 1;
} else { } else {
%(name)s_output_dims = new size_t[%(name)s_output_ndim]; %(name)s_output_dims = (size_t*)malloc(%(name)s_output_ndim * sizeof(size_t));
if(%(name)s_redux_len == 1) { if (%(name)s_redux_len == 1) {
for(unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) { for (unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) {
%(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i); %(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i);
} }
for(unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) { for (unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) {
%(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i); %(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i);
} }
} else { } else {
int64_t current_input_pos = -1; int64_t current_input_pos = -1;
int64_t current_output_pos = -1; int64_t current_output_pos = -1;
for(unsigned i = 0; i < %(name)s_redux_len; ++i) { for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
for(++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) { for (++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos); %(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
} }
} }
for(++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) { for (++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos); %(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
} }
} }
} }
if(theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) { if (theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
%(fail)s %(fail)s
} }
if(theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) { if (theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
%(fail)s %(fail)s
} }
if(GA_NO_ERROR != if (GA_NO_ERROR !=
GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, %(name)s_input, %(name)s_redux_len, %(name)s_axes_to_reduce) GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, %(name)s_input, %(name)s_redux_len, %(name)s_axes_to_reduce)
) { ) {
%(fail)s %(fail)s
...@@ -200,9 +159,8 @@ class GpuMaxAndArgmax(Op): ...@@ -200,9 +159,8 @@ class GpuMaxAndArgmax(Op):
def c_code_cleanup(self, node, name, inputs, outputs, sub): def c_code_cleanup(self, node, name, inputs, outputs, sub):
return """ return """
delete[] %(name)s_output_dims; free(%(name)s_output_dims);
if(%(name)s_input != &%(X)s->ga) delete %(name)s_input; free(%(name)s_axes_to_reduce);
delete[] %(name)s_axes_to_reduce;
""" % {'name': name, 'X': inputs[0]} """ % {'name': name, 'X': inputs[0]}
......
from unittest import TestCase from unittest import TestCase
from theano.gpuarray import GpuArrayType
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as T
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
test_shape = (1000, 100, 10, 5, 2)
def randomTensor(*shapes): def numpy_random_array(*shapes):
dimlist = shapes dimlist = shapes
size = 1 size = 1
for dimsize in dimlist: for dimsize in dimlist:
size *= dimsize size *= dimsize
return np.random.normal(size=size).astype(np.float32).reshape(dimlist) return np.random.normal(size=size).astype(theano.config.floatX).reshape(dimlist)
def numpyMaxAndArgmax(X, axis=None): def numpy_maxandargmax(X, axis=None):
if axis is None: if axis is None:
axis = range(X.ndim) axis = range(X.ndim)
elif not isinstance(axis, (tuple, list)): elif not isinstance(axis, (tuple, list)):
...@@ -33,107 +40,93 @@ def numpyMaxAndArgmax(X, axis=None): ...@@ -33,107 +40,93 @@ def numpyMaxAndArgmax(X, axis=None):
reshaped_x = transposed_x.reshape(new_shape) reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1)) return (ref_max, np.argmax(reshaped_x, axis=-1))
# We run all tests with 5-D tensors of 10 000 000 elements.
# NB: In each test, any first call of theano function should be ignored
# with Theano config flag profiling.ignore_first_call=True.
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
def run_gpu_tensor5(test_matrix=None, axis=None):
M = GpuArrayType(dtype=theano.config.floatX, broadcastable=(False,) * 5)()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='GPU-function', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
if test_matrix is None:
test_matrix = rand_gpuarray(*test_shape)
f(test_matrix)
theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpy_maxandargmax(np.asarray(test_matrix), axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
class TestGpuMaxAndArgmax(TestCase):
# We run all tests with 5-D tensors of 10 000 000 elements.
# NB: In each test, any first call of theano function should be ignored
# with Theano config flag profiling.ignore_first_call=True.
# To just check if GpuMaxAndArgmax is called:
# $ theano-cache purge && THEANO_FLAGS=floatX=float32,device=cuda,profile=True,profiling.ignore_first_call=True \
# nosetests --verbose theano/gpuarray/tests/test_GpuMaxAndArgmax.py:TestGpuMaxAndArgmax.test_none
def _basic_test_tensor5(self, axis=None): def run_cpu_tensor5(test_matrix=None, axis=None):
M = T.tensor5() M = T.tensor5()
max_M = T.max(M, axis=axis) f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='cpu-function', mode=mode_without_gpu)
argmax_M = T.argmax(M, axis=axis) check_if_gpu_maxandargmax_not_in_graph(f)
f = theano.function([M], [max_M, argmax_M]) if test_matrix is None:
test_matrix = randomTensor(1000, 100, 10, 5, 2) test_matrix = numpy_random_array(*test_shape)
f(test_matrix) f(test_matrix)
theano_max, theano_argmax = f(test_matrix) theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpyMaxAndArgmax(test_matrix, axis=axis) ref_max, ref_argmax = numpy_maxandargmax(test_matrix, axis=axis)
utt.assert_allclose(ref_max, theano_max) utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax) utt.assert_allclose(ref_argmax, theano_argmax)
def _basic_test_assert_equals(self, axis1, axis2):
M1 = T.tensor5() def run_tensor5(axis=None):
M2 = T.tensor5() test_cpu_matrix = numpy_random_array(*test_shape)
f1 = theano.function([M1], [T.max(M1, axis=axis1), T.argmax(M1, axis=axis1)]) test_gpu_matrix = rand_gpuarray(*test_shape)
f2 = theano.function([M2], [T.max(M2, axis=axis2), T.argmax(M2, axis=axis2)]) run_cpu_tensor5(test_cpu_matrix, axis)
test_matrix = randomTensor(1000, 100, 10, 5, 2) run_gpu_tensor5(test_gpu_matrix, axis)
f1(test_matrix)
f2(test_matrix)
theano1 = f1(test_matrix) def test_none():
theano2 = f2(test_matrix) run_tensor5(None)
ref1 = numpyMaxAndArgmax(test_matrix, axis1)
ref2 = numpyMaxAndArgmax(test_matrix, axis2)
utt.assert_allclose(ref1, ref2) def test_all_axes():
utt.assert_allclose(theano1, theano2) run_tensor5((0, 1, 2, 3, 4))
utt.assert_allclose(ref1, theano1)
def test_none(self): def test_all_axes_unsorted():
self._basic_test_tensor5(None) run_tensor5((4, 1, 3, 0, 2))
def test_all_axes(self):
self._basic_test_tensor5((0, 1, 2, 3, 4)) def test_axis_1():
run_tensor5(0)
def test_1_axe(self):
self._basic_test_tensor5(3)
def test_axis_2():
def test_2_axes(self): run_tensor5(1)
self._basic_test_tensor5((0, 3))
def test_3_axes(self): def test_axis_3():
self._basic_test_tensor5((0, 3, 4)) run_tensor5(2)
def test_4_axes(self):
self._basic_test_tensor5((0, 1, 2, 4)) def test_axis_4():
run_tensor5(3)
def test_simple(self):
self._basic_test_tensor5(None)
self._basic_test_tensor5((0, 1, 2, 3, 4)) def test_axis_5():
self._basic_test_tensor5((4, 1, 3, 2)) run_tensor5(4)
def test_assert_equals(self):
self._basic_test_assert_equals(None, (0, 1, 2, 3, 4)) def test_2_axes():
self._basic_test_assert_equals(0, (0, 0)) run_tensor5((0, 3))
self._basic_test_assert_equals((4, 1, 3, 2), (1, 2, 3, 4))
self._basic_test_assert_equals((4, 3, 2, 1, 0), None)
self._basic_test_assert_equals((1, 3, 4), (1, 4, 4, 1, 3, 1, 3, 4, 3, 1, 1, 3, 1, 4, 1, 4)) def test_3_axes():
run_tensor5((0, 3, 4))
def test_simple_1_axis(self):
self._basic_test_tensor5(0)
self._basic_test_tensor5(1) def test_4_axes():
self._basic_test_tensor5(2) run_tensor5((0, 1, 2, 4))
self._basic_test_tensor5(3)
self._basic_test_tensor5(4)
def test_simple_2_axis(self):
self._basic_test_tensor5((0, 0))
self._basic_test_tensor5((0, 1))
self._basic_test_tensor5((0, 2))
self._basic_test_tensor5((0, 3))
self._basic_test_tensor5((0, 4))
self._basic_test_tensor5((1, 0))
self._basic_test_tensor5((1, 1))
self._basic_test_tensor5((1, 2))
self._basic_test_tensor5((1, 3))
self._basic_test_tensor5((1, 4))
self._basic_test_tensor5((2, 0))
self._basic_test_tensor5((2, 1))
self._basic_test_tensor5((2, 2))
self._basic_test_tensor5((2, 3))
self._basic_test_tensor5((2, 4))
self._basic_test_tensor5((3, 0))
self._basic_test_tensor5((3, 1))
self._basic_test_tensor5((3, 2))
self._basic_test_tensor5((3, 3))
self._basic_test_tensor5((3, 4))
self._basic_test_tensor5((4, 0))
self._basic_test_tensor5((4, 1))
self._basic_test_tensor5((4, 2))
self._basic_test_tensor5((4, 3))
self._basic_test_tensor5((4, 4))
...@@ -1186,32 +1186,12 @@ class MaxAndArgmax(Op): ...@@ -1186,32 +1186,12 @@ class MaxAndArgmax(Op):
def make_node(self, x, axis=None): def make_node(self, x, axis=None):
x = _as_tensor_variable(x) x = _as_tensor_variable(x)
if isinstance(axis, (integer_types, numpy.integer)): if axis is None:
axis = [int(axis)] axis = range(x.type.ndim)
elif isinstance(axis, numpy.ndarray) and axis.ndim == 0: elif not isinstance(axis, list):
axis = [int(axis)] raise TypeError("Axis must be a list. Got %s" % axis)
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(a) for a in axis]
if axis == list(range(x.type.ndim)):
axis = None
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = None
elif not isinstance(axis, TensorConstant):
raise TypeError(
"MaxAndArgmax needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or
axis.dtype.startswith("uint"))
if isinstance(axis.data, (integer_types, numpy.integer)) or \
(isinstance(axis.data, numpy.ndarray) and
axis.data.ndim == 0):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
# Make axis entries non-negative, and sort them # Make axis entries non-negative, and sort them
if isinstance(axis, list):
for idx in xrange(len(axis)): for idx in xrange(len(axis)):
if axis[idx] < 0: if axis[idx] < 0:
axis[idx] += x.type.ndim axis[idx] += x.type.ndim
...@@ -1219,7 +1199,6 @@ class MaxAndArgmax(Op): ...@@ -1219,7 +1199,6 @@ class MaxAndArgmax(Op):
# Verify that axes are valid # Verify that axes are valid
all_axes = [] all_axes = []
if isinstance(axis, list):
for ax in axis: for ax in axis:
if ax < 0 or ax >= x.type.ndim: if ax < 0 or ax >= x.type.ndim:
raise ValueError( raise ValueError(
...@@ -1227,12 +1206,6 @@ class MaxAndArgmax(Op): ...@@ -1227,12 +1206,6 @@ class MaxAndArgmax(Op):
'input is: %s)' % (ax, x.type.ndim)) 'input is: %s)' % (ax, x.type.ndim))
if ax not in all_axes: if ax not in all_axes:
all_axes.append(ax) all_axes.append(ax)
else:
all_axes = list(range(x.ndim))
if axis is None or axis == list(range(x.type.ndim)):
axis = NoneConst.clone()
else:
axis = _as_tensor_variable(all_axes) axis = _as_tensor_variable(all_axes)
assert axis.ndim == 1 assert axis.ndim == 1
inputs = [x, axis] inputs = [x, axis]
...@@ -1272,7 +1245,7 @@ class MaxAndArgmax(Op): ...@@ -1272,7 +1245,7 @@ class MaxAndArgmax(Op):
x, axis = inp x, axis = inp
max, argmax = out max, argmax = out
fail = sub["fail"] fail = sub["fail"]
if NoneConst.equals(node.inputs[1]): if NoneConst.equals(node.inputs[1]) or len(node.inputs[1].data) == node.inputs[0].ndim:
axis_code = "axis = NPY_MAXDIMS;" axis_code = "axis = NPY_MAXDIMS;"
else: else:
assert node.inputs[1].ndim == 1 assert node.inputs[1].ndim == 1
...@@ -1637,6 +1610,26 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -1637,6 +1610,26 @@ def max_and_argmax(a, axis=None, keepdims=False):
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
# Check axis and convert it to a Python list of integers.
if axis is None:
axis = range(a.type.ndim)
elif (isinstance(axis, (integer_types, numpy.integer)) or
(isinstance(axis, numpy.ndarray) and axis.ndim == 0)):
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(i) for i in axis]
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = range(a.type.ndim)
elif not isinstance(axis, TensorConstant):
raise TypeError("max and argmax computation needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or axis.dtype.startswith("uint"))
if (isinstance(axis.data, (integer_types, numpy.integer)) or
(isinstance(axis.data, numpy.ndarray) and axis.data.ndim == 0)):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
out, argout = _max_and_argmax(a, axis) out, argout = _max_and_argmax(a, axis)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论