提交 3347480a authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5168 from notoraptor/master

This is my proposal for GpuMaxAndArgmax (issue #1399).
......@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined, _get_props)
from .basic_ops import as_gpuarray_variable
from . import fft, dnn, opt, nerv, extra_ops, multinomial
from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction
def transfer(x, target):
try:
......
......@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -1775,6 +1776,14 @@ def _scan_type_infer(node):
context_name=context_name)
return typebuild
# Add optimization : maxandargmax (CPU -> GPU)
@register_opt('fast_compile')
@op_lifter([tensor.MaxAndArgmax])
@register_opt2([tensor.MaxAndArgmax], 'fast_compile')
def local_gpu_maxandargmax(op, context_name, inputs, outputs):
return GpuMaxAndArgmax(op.get_params(None))
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace',
......
from __future__ import print_function, absolute_import, division
import os
import theano
from theano.gof import Op, Apply
from theano.gof.type import Generic
from .basic_ops import (infer_context_name, as_gpuarray_variable)
from .type import GpuArrayType
try:
import pygpu
except ImportError as e:
pass
class GpuMaxAndArgmax(Op):
"""
GPU version of MaxAndArgmax
"""
params_type = Generic()
__props__ = ('axis',)
argmax_dtype = "int64"
def __init__(self, axis):
assert isinstance(axis, (list, tuple))
self.axis = tuple(axis)
def get_params(self, node):
return self.axis
def make_node(self, X):
context_name = infer_context_name(X)
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(X.type.broadcastable)
if i not in all_axes]
inputs = [as_gpuarray_variable(X, context_name)]
outputs = [GpuArrayType(X.type.dtype, broadcastable, context_name=context_name, name='max')(),
GpuArrayType(self.argmax_dtype, broadcastable, context_name=context_name, name='argmax')()]
return Apply(self, inputs, outputs)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)]
def c_code(self, node, name, input_names, output_names, sub):
# Recall: X = input_names[0]
# Recall: axes = sub['params']
# Recall: max, argmax = output_names
# Recall: fail = sub['fail']
max_typecode = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
argmax_typecode = pygpu.gpuarray.dtype_to_typecode(self.argmax_dtype)
ret = """
#if PY_MAJOR_VERSION >= 3
#ifndef PyInt_AS_LONG
#define PyInt_AS_LONG PyLong_AS_LONG
#endif
#endif
unsigned %(name)s_redux_len = PyTuple_GET_SIZE(%(axes)s);
unsigned* %(name)s_axes_to_reduce = (unsigned*)malloc(%(name)s_redux_len * sizeof(unsigned));
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
PyObject* axis_object = PyTuple_GET_ITEM(%(axes)s, i);
%(name)s_axes_to_reduce[i] = (unsigned) PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
}
size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s);
size_t %(name)s_output_ndim = %(name)s_input_ndim - %(name)s_redux_len;
size_t* %(name)s_output_dims = (size_t*)malloc(%(name)s_output_ndim * sizeof(size_t));
if (%(name)s_redux_len == 1) {
for (unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) {
%(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i);
}
for (unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) {
%(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i);
}
} else {
int64_t current_input_pos = -1;
int64_t current_output_pos = -1;
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
for (++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
for (++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
if (theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare max output.");
%(fail)s
}
if (theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare argmax output.");
%(fail)s
}
if (%(name)s_input_ndim == 0) {
/* GpuArray_maxandargmax can't handle a 0-d array
* because it expects that 1 <= redux_len <= input_ndim.
* As input_ndim == 0, then 1 <= redux_len <= 0 is false.
* To handle this case we copy input to max and we set argmax to 0.
*/
if (GA_NO_ERROR != GpuArray_setarray(&%(max)s->ga, &%(X)s->ga)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to copy input to max when input is a scalar.");
%(fail)s
}
if (GA_NO_ERROR != GpuArray_memset(&%(argmax)s->ga, 0)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to set argmax to 0 when input is a scalar.");
%(fail)s
}
} else if (GA_NO_ERROR !=
GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, &%(X)s->ga, %(name)s_redux_len, %(name)s_axes_to_reduce)
) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to compute gpuarray maxandargmax.");
%(fail)s
}
"""
if theano.config.gpuarray.sync:
ret += """
GpuArray_sync(&%(max)s->ga);
GpuArray_sync(&%(argmax)s->ga);
"""
return ret % {'X': input_names[0], 'axes': sub['params'], 'max': output_names[0], 'argmax': output_names[1],
'max_typecode': max_typecode, 'argmax_typecode': argmax_typecode,
'name': name, 'fail': sub['fail']}
def c_code_cleanup(self, node, name, inputs, outputs, sub):
return """
free(%(name)s_output_dims);
free(%(name)s_axes_to_reduce);
""" % {'name': name, 'X': inputs[0]}
from __future__ import print_function, absolute_import, division
from unittest import TestCase
import numpy as np
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tests.unittest_tools import SkipTest
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
from .. import GpuArrayType
import math
# Number of values to be used in test tensors (except with 0-D tensors!).
test_size = 10000000
# NB: This order of "unsorted axes" is arbitrary and is here
# just to have the same informations on profile output
# from one test to another.
unsorted_axes = (2, 4, 0, 3, 1)
np.random.seed()
def numpy_random_array(shapes):
size = 1
for dimsize in shapes:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(shapes)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = list(range(X.ndim))
elif not isinstance(axis, (tuple, list)):
axis = [int(axis)]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Following code is copied from MaxAndArgmax.perform():
# Numpy does not support multiple axes for argmax. Work around.
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
new_shape = tuple(int(i) for i in new_shape)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
class BaseTest:
# This attribute must be set in subclasses.
tensor_size = None
shape = None
dtype = theano.config.floatX
def get_shape(self):
if self.tensor_size == 0:
return []
return [int(math.ceil(math.pow(test_size, 1 / self.tensor_size)))] * self.tensor_size
def setUp(self):
if not isinstance(self.tensor_size, int):
raise SkipTest("No tensor ndim defined.")
if self.tensor_size < 0 or self.tensor_size > 5:
raise SkipTest("We allow from 0 (included) to 5 (inclued) dimensons for these tests.")
if self.shape is None:
self.shape = self.get_shape()
def get_host_tensor(self):
broadcastable = (False,) * self.tensor_size
return T.tensor(self.dtype, broadcastable)
def get_gpu_tensor(self):
broadcastable = (False,) * self.tensor_size
return GpuArrayType(self.dtype, broadcastable)()
def get_host_value(self):
return numpy_random_array(self.shape)
def get_gpu_value(self):
return rand_gpuarray(*self.shape)
# NB: In compute_host() and compute_gpu(),
# the first call of the theano function should be ignored in profiling,
# with Theano config flag profiling.ignore_first_call=True.
def compute_host(self, test_tensor, axis):
M = self.get_host_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='shape:' + str(test_tensor.shape) + '/axis:' + str(axis) + '/HOST', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
f(test_tensor)
theano_max, theano_argmax = f(test_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
M = self.get_gpu_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='shape:' + str(test_gpu_tensor.shape) + '/axis:' + str(axis) + '/GPU', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
f(test_gpu_tensor)
theano_max, theano_argmax = f(test_gpu_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_host_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute(self, axis=None):
# We want to run CPU op and GPU op on the same tensor randomly generated.
test_gpu_tensor = self.get_gpu_value()
test_host_tensor = np.asarray(test_gpu_tensor)
self.compute_host(test_host_tensor, axis)
self.compute_gpu(test_gpu_tensor, test_host_tensor, axis)
def compute_axis(self, pos):
if self.tensor_size != 1 and 0 <= pos < self.tensor_size:
self.compute(pos)
def compute_some_axes(self, count):
if 0 <= count < self.tensor_size:
self.compute([i for i in unsorted_axes if i < self.tensor_size][:count])
# Equivalent to test reduction on all axes.
def test_none(self):
self.compute(None)
def test_axis_1(self):
self.compute_axis(0)
def test_axis_2(self):
self.compute_axis(1)
def test_axis_3(self):
self.compute_axis(2)
def test_axis_4(self):
self.compute_axis(3)
def test_axis_5(self):
self.compute_axis(4)
# For the tests below, we expect CPU op to run with Python implementation.
def test_2_axes(self):
self.compute_some_axes(2)
def test_3_axes(self):
self.compute_some_axes(3)
def test_4_axes(self):
self.compute_some_axes(4)
class TestScalar(BaseTest, TestCase):
tensor_size = 0
class TestVector(BaseTest, TestCase):
tensor_size = 1
# Special case
class TestRow(BaseTest, TestCase):
tensor_size = 2
shape = [1, test_size]
# Special case
class TestColumn(BaseTest, TestCase):
tensor_size = 2
shape = [test_size, 1]
class TestMatrix(BaseTest, TestCase):
tensor_size = 2
class TestTensor5(BaseTest, TestCase):
tensor_size = 5
......@@ -15,6 +15,7 @@ from theano.compat import izip
from theano.configparser import config
from theano import gof
from theano.gof import Apply, Constant, Op, Variable
from theano.gof.type import Generic
from theano.tensor import elemwise
from theano.tensor.var import (AsTensorError, TensorVariable,
......@@ -1181,72 +1182,32 @@ class MaxAndArgmax(Op):
nin = 2 # tensor, axis
nout = 2 # max val, max idx
E_axis = 'invalid axis'
__props__ = ()
params_type = Generic()
__props__ = ('axis',)
def make_node(self, x, axis=None):
x = _as_tensor_variable(x)
def __init__(self, axis):
assert isinstance(axis, list)
self.axis = tuple(axis)
if isinstance(axis, (integer_types, numpy.integer)):
axis = [int(axis)]
elif isinstance(axis, numpy.ndarray) and axis.ndim == 0:
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(a) for a in axis]
if axis == list(range(x.type.ndim)):
axis = None
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = None
elif not isinstance(axis, TensorConstant):
raise TypeError(
"MaxAndArgmax needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or
axis.dtype.startswith("uint"))
if isinstance(axis.data, (integer_types, numpy.integer)) or \
(isinstance(axis.data, numpy.ndarray) and
axis.data.ndim == 0):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
def get_params(self, node):
return self.axis
# Make axis entries non-negative, and sort them
if isinstance(axis, list):
for idx in xrange(len(axis)):
if axis[idx] < 0:
axis[idx] += x.type.ndim
axis.sort()
# Verify that axes are valid
all_axes = []
if isinstance(axis, list):
for ax in axis:
if ax < 0 or ax >= x.type.ndim:
raise ValueError(
'Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (ax, x.type.ndim))
if ax not in all_axes:
all_axes.append(ax)
else:
all_axes = list(range(x.ndim))
if axis is None or axis == list(range(x.type.ndim)):
axis = NoneConst.clone()
else:
axis = _as_tensor_variable(all_axes)
assert axis.ndim == 1
inputs = [x, axis]
def make_node(self, x):
x = _as_tensor_variable(x)
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(x.type.broadcastable)
if i not in all_axes]
inputs = [x]
outputs = [tensor(x.type.dtype, broadcastable, name='max'),
tensor('int64', broadcastable, name='argmax')]
return Apply(self, inputs, outputs)
def perform(self, node, inp, outs):
x, axes = inp
def perform(self, node, inp, outs, params):
x = inp[0]
axes = params
max, max_idx = outs
if axes is None:
axes = tuple(range(x.ndim))
......@@ -1269,35 +1230,46 @@ class MaxAndArgmax(Op):
dtype='int64')
def c_code(self, node, name, inp, out, sub):
x, axis = inp
if len(self.axis) != 1 and len(self.axis) != node.inputs[0].ndim:
raise NotImplementedError("NumPy C-API can compute max and argmax only for 1 axis or for all axes.")
x = inp[0]
axis = sub['params']
max, argmax = out
fail = sub["fail"]
if NoneConst.equals(node.inputs[1]):
axis_code = "axis = NPY_MAXDIMS;"
else:
assert node.inputs[1].ndim == 1
# Fall back to perform() if there are multiple axes
if len(node.inputs[1].data) > 1:
raise NotImplementedError()
axis_code = """
axis = ((dtype_%(axis)s*)PyArray_DATA(%(axis)s))[0];
if(axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)){
ret = """
#if PY_MAJOR_VERSION >= 3
#ifndef PyInt_AS_LONG
#define PyInt_AS_LONG PyLong_AS_LONG
#endif
#endif
int axis;
if (PyTuple_GET_SIZE(%(axis)s) == PyArray_NDIM(%(x)s)) {
axis = NPY_MAXDIMS;
} else if(PyTuple_GET_SIZE(%(axis)s) == 1) {
PyObject* axis_object = PyTuple_GET_ITEM(%(axis)s, 0);
axis = (int)PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
if (axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)) {
PyErr_SetString(PyExc_ValueError,
"MaxAndArgmax, bad axis argument");
"MaxAndArgmax: bad axis argument");
%(fail)s
}
""" % locals()
ret = """
int axis;
} else {
PyErr_SetString(PyExc_NotImplementedError,
"MaxAndArgmax: NumPy C-API can compute max and argmax only for 1 axis or for all axes.");
%(fail)s
}
Py_CLEAR(%(max)s);
Py_CLEAR(%(argmax)s);//todo pass them as out parameter.
%(axis_code)s
%(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL);
if(%(max)s == NULL){
if (%(max)s == NULL) {
%(fail)s;
}
if(!PyArray_CheckExact(%(max)s)){
if (!PyArray_CheckExact(%(max)s)) {
%(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(max)s == NULL){
%(fail)s;
......@@ -1305,17 +1277,17 @@ class MaxAndArgmax(Op):
}
%(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL);
if(%(argmax)s == NULL){
if (%(argmax)s == NULL) {
Py_CLEAR(%(max)s);
%(fail)s;
}
if(!PyArray_CheckExact(%(argmax)s)){
if (!PyArray_CheckExact(%(argmax)s)) {
%(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(argmax)s == NULL){
%(fail)s;
}
}
if(PyArray_TYPE(%(argmax)s) != NPY_INT64){
if (PyArray_TYPE(%(argmax)s) != NPY_INT64) {
PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64);
if (NULL == tmp){
%(fail)s;
......@@ -1330,28 +1302,25 @@ class MaxAndArgmax(Op):
return (4,)
def infer_shape(self, node, shapes):
ishape, axis_shape = shapes
axis = node.inputs[1]
if axis.data is None:
return [(), ()]
rval = tuple([ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in axis.data])
ishape = shapes[0]
rval = tuple(ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in self.axis)
return [rval, rval]
def R_op(self, inputs, eval_points):
if eval_points[0] is None:
return [None, None]
if not isinstance(inputs[1], theano.Constant):
if len(self.axis) != 1:
raise ValueError(('R_op supported for arg_max only for '
'constant axis!'))
if inputs[1].data > 1:
'one axis!'))
if self.axis[0] > 1:
raise ValueError(('R_op supported for arg_max only when '
' axis is 0 or 1'))
if inputs[0].ndim != 2:
raise ValueError(('R_op supported for arg_max only when '
' input is a matrix'))
max_vals, max_pos = self.make_node(*inputs).outputs
if inputs[1].data == 0:
if self.axis[0] == 0:
return [eval_points[0][max_pos,
arange(eval_points[0].shape[1])], None]
else:
......@@ -1372,7 +1341,8 @@ class MaxAndArgmax(Op):
# g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
x, axis = inp
x = inp[0]
axis = _as_tensor_variable(self.axis)
g_max, g_max_idx = grads
g_max_disconnected = isinstance(g_max.type, DisconnectedType)
......@@ -1382,15 +1352,10 @@ class MaxAndArgmax(Op):
if g_max_disconnected and g_max_idx_disconnected:
return [DisconnectedType()(), DisconnectedType()()]
axis_grad = grad_undefined(
self, 1, axis,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined")
# if the max is disconnected but the argmax is not,
# the gradient on its inputs is zero
if g_max_disconnected:
return [x.zeros_like(), axis_grad]
return [x.zeros_like()]
if NoneConst.equals(axis):
axis_ = list(range(x.ndim))
else:
......@@ -1414,9 +1379,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad
return g_x, axis_grad
_max_and_argmax = MaxAndArgmax()
return g_x,
class Argmax(Op):
......@@ -1637,8 +1600,39 @@ def max_and_argmax(a, axis=None, keepdims=False):
will broadcast correctly against the original tensor.
"""
out, argout = _max_and_argmax(a, axis)
# Check axis and convert it to a Python list of integers.
# Axis will be used as an op param of MaxAndArgmax.
if axis is None:
axis = list(range(a.type.ndim))
elif (isinstance(axis, (integer_types, numpy.integer)) or
(isinstance(axis, numpy.ndarray) and axis.ndim == 0)):
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(i) for i in axis]
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = list(range(a.type.ndim))
elif not isinstance(axis, TensorConstant):
raise TypeError("max and argmax computation needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or axis.dtype.startswith("uint"))
if (isinstance(axis.data, (integer_types, numpy.integer)) or
(isinstance(axis.data, numpy.ndarray) and axis.data.ndim == 0)):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
if len(axis) == 0:
axis = list(range(a.type.ndim))
else:
for i in range(len(axis)):
if axis[i] < 0:
axis[i] += a.type.ndim
if axis[i] < 0 or axis[i] >= a.type.ndim:
raise ValueError("max and argmax computation needs a valid axis number for %d-D tensor. Got %d"
% (a.type.ndim, axis[i]))
axis = list(set(axis))
axis.sort()
out, argout = MaxAndArgmax(axis)(a)
if keepdims:
out = makeKeepDims(a, out, axis)
......
......@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
@opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([tensor._max_and_argmax])
@gof.local_optimizer([tensor.MaxAndArgmax])
def local_argmax_pushdown(node):
if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
if isinstance(node.op, tensor.MaxAndArgmax) and node.inputs[0].owner and \
len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias):
......@@ -1584,23 +1584,24 @@ def local_argmax_pushdown(node):
"warning set the Theano flags 'warn.argmax_pushdown_bug' "
"to False")
if (node.op == tensor._max_and_argmax and
if (isinstance(node.op, tensor.MaxAndArgmax) and
node.inputs[0].owner and len(node.outputs[0].clients) == 0):
x_max, x_argmax = node.outputs
x, axis = node.inputs
x = node.inputs[0]
axis = node.op.get_params(node)
# TODO: Make a list/set of monotonic ops...
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs
ret = tensor._max_and_argmax(pre_x, axis)
ret = tensor.max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret)
return ret
if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs
ret = tensor._max_and_argmax(pre_x +
tensor.DimShuffle(
pre_bias.broadcastable,
('x', 0))(pre_bias), axis)
ret = tensor.max_and_argmax(pre_x +
tensor.DimShuffle(
pre_bias.broadcastable,
('x', 0))(pre_bias), axis)
# copy both stack traces
copy_stack_trace(x_max, ret)
return ret
......
......@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
from theano.tensor import basic as T
from theano.tensor import DimShuffle
from theano.tensor.basic import (get_scalar_constant_value,
NotScalarConstantError)
from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal
......@@ -50,31 +48,19 @@ _logger = logging.getLogger('theano.tensor.opt')
@register_uncanonicalize
@gof.local_optimizer([T._max_and_argmax])
@gof.local_optimizer([T.MaxAndArgmax])
def local_max_and_argmax(node):
"""
If we don't use the argmax, change it to a max only.
"""
if node.op == T._max_and_argmax:
if isinstance(node.op, T.MaxAndArgmax):
axis = node.op.get_params(node)
if len(node.outputs[1].clients) == 0:
# MaxAndArgmax support variable axis,
# but CAReduce support only constant axis.
if node.inputs[1].data is None:
axis = None
else:
try:
axis = get_scalar_constant_value(node.inputs[1])
except NotScalarConstantError:
axis = node.inputs[1]
if not isinstance(axis, T.TensorConstant):
return False
axis = axis.data
new = CAReduce(scal.maximum, axis)(node.inputs[0])
return [new, None]
if len(node.outputs[0].clients) == 0:
return [None, T._argmax(node.inputs[0], node.inputs[1])]
return [None, T._argmax(node.inputs[0], axis)]
@register_uncanonicalize
......
......@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
# MaxAndArgmax,
adtens3_val = rand(4, 5, 3)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, None),
max_and_argmax(adtens3, None),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 0),
max_and_argmax(adtens3, 0),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 1),
max_and_argmax(adtens3, 1),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 2),
max_and_argmax(adtens3, 2),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, [0, 1, 2]),
max_and_argmax(adtens3, [0, 1, 2]),
[adtens3_val], MaxAndArgmax)
# ARange
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论