提交 3347480a authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5168 from notoraptor/master

This is my proposal for GpuMaxAndArgmax (issue #1399).
...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, ...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined, _get_props) reg_context, get_context, ContextNotDefined, _get_props)
from .basic_ops import as_gpuarray_variable from .basic_ops import as_gpuarray_variable
from . import fft, dnn, opt, nerv, extra_ops, multinomial from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction
def transfer(x, target): def transfer(x, target):
try: try:
......
...@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
...@@ -1775,6 +1776,14 @@ def _scan_type_infer(node): ...@@ -1775,6 +1776,14 @@ def _scan_type_infer(node):
context_name=context_name) context_name=context_name)
return typebuild return typebuild
# Add optimization : maxandargmax (CPU -> GPU)
@register_opt('fast_compile')
@op_lifter([tensor.MaxAndArgmax])
@register_opt2([tensor.MaxAndArgmax], 'fast_compile')
def local_gpu_maxandargmax(op, context_name, inputs, outputs):
return GpuMaxAndArgmax(op.get_params(None))
# Do not register in fast_run or fast_compile. # Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled. # It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace', optdb.register('gpua_scanOp_make_inplace',
......
from __future__ import print_function, absolute_import, division
import os
import theano
from theano.gof import Op, Apply
from theano.gof.type import Generic
from .basic_ops import (infer_context_name, as_gpuarray_variable)
from .type import GpuArrayType
try:
import pygpu
except ImportError as e:
pass
class GpuMaxAndArgmax(Op):
"""
GPU version of MaxAndArgmax
"""
params_type = Generic()
__props__ = ('axis',)
argmax_dtype = "int64"
def __init__(self, axis):
assert isinstance(axis, (list, tuple))
self.axis = tuple(axis)
def get_params(self, node):
return self.axis
def make_node(self, X):
context_name = infer_context_name(X)
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(X.type.broadcastable)
if i not in all_axes]
inputs = [as_gpuarray_variable(X, context_name)]
outputs = [GpuArrayType(X.type.dtype, broadcastable, context_name=context_name, name='max')(),
GpuArrayType(self.argmax_dtype, broadcastable, context_name=context_name, name='argmax')()]
return Apply(self, inputs, outputs)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)]
def c_code(self, node, name, input_names, output_names, sub):
# Recall: X = input_names[0]
# Recall: axes = sub['params']
# Recall: max, argmax = output_names
# Recall: fail = sub['fail']
max_typecode = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
argmax_typecode = pygpu.gpuarray.dtype_to_typecode(self.argmax_dtype)
ret = """
#if PY_MAJOR_VERSION >= 3
#ifndef PyInt_AS_LONG
#define PyInt_AS_LONG PyLong_AS_LONG
#endif
#endif
unsigned %(name)s_redux_len = PyTuple_GET_SIZE(%(axes)s);
unsigned* %(name)s_axes_to_reduce = (unsigned*)malloc(%(name)s_redux_len * sizeof(unsigned));
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
PyObject* axis_object = PyTuple_GET_ITEM(%(axes)s, i);
%(name)s_axes_to_reduce[i] = (unsigned) PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
}
size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s);
size_t %(name)s_output_ndim = %(name)s_input_ndim - %(name)s_redux_len;
size_t* %(name)s_output_dims = (size_t*)malloc(%(name)s_output_ndim * sizeof(size_t));
if (%(name)s_redux_len == 1) {
for (unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) {
%(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i);
}
for (unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) {
%(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i);
}
} else {
int64_t current_input_pos = -1;
int64_t current_output_pos = -1;
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
for (++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
for (++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
if (theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare max output.");
%(fail)s
}
if (theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare argmax output.");
%(fail)s
}
if (%(name)s_input_ndim == 0) {
/* GpuArray_maxandargmax can't handle a 0-d array
* because it expects that 1 <= redux_len <= input_ndim.
* As input_ndim == 0, then 1 <= redux_len <= 0 is false.
* To handle this case we copy input to max and we set argmax to 0.
*/
if (GA_NO_ERROR != GpuArray_setarray(&%(max)s->ga, &%(X)s->ga)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to copy input to max when input is a scalar.");
%(fail)s
}
if (GA_NO_ERROR != GpuArray_memset(&%(argmax)s->ga, 0)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to set argmax to 0 when input is a scalar.");
%(fail)s
}
} else if (GA_NO_ERROR !=
GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, &%(X)s->ga, %(name)s_redux_len, %(name)s_axes_to_reduce)
) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to compute gpuarray maxandargmax.");
%(fail)s
}
"""
if theano.config.gpuarray.sync:
ret += """
GpuArray_sync(&%(max)s->ga);
GpuArray_sync(&%(argmax)s->ga);
"""
return ret % {'X': input_names[0], 'axes': sub['params'], 'max': output_names[0], 'argmax': output_names[1],
'max_typecode': max_typecode, 'argmax_typecode': argmax_typecode,
'name': name, 'fail': sub['fail']}
def c_code_cleanup(self, node, name, inputs, outputs, sub):
return """
free(%(name)s_output_dims);
free(%(name)s_axes_to_reduce);
""" % {'name': name, 'X': inputs[0]}
from __future__ import print_function, absolute_import, division
from unittest import TestCase
import numpy as np
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tests.unittest_tools import SkipTest
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
from .. import GpuArrayType
import math
# Number of values to be used in test tensors (except with 0-D tensors!).
test_size = 10000000
# NB: This order of "unsorted axes" is arbitrary and is here
# just to have the same informations on profile output
# from one test to another.
unsorted_axes = (2, 4, 0, 3, 1)
np.random.seed()
def numpy_random_array(shapes):
size = 1
for dimsize in shapes:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(shapes)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = list(range(X.ndim))
elif not isinstance(axis, (tuple, list)):
axis = [int(axis)]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Following code is copied from MaxAndArgmax.perform():
# Numpy does not support multiple axes for argmax. Work around.
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
new_shape = tuple(int(i) for i in new_shape)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
class BaseTest:
# This attribute must be set in subclasses.
tensor_size = None
shape = None
dtype = theano.config.floatX
def get_shape(self):
if self.tensor_size == 0:
return []
return [int(math.ceil(math.pow(test_size, 1 / self.tensor_size)))] * self.tensor_size
def setUp(self):
if not isinstance(self.tensor_size, int):
raise SkipTest("No tensor ndim defined.")
if self.tensor_size < 0 or self.tensor_size > 5:
raise SkipTest("We allow from 0 (included) to 5 (inclued) dimensons for these tests.")
if self.shape is None:
self.shape = self.get_shape()
def get_host_tensor(self):
broadcastable = (False,) * self.tensor_size
return T.tensor(self.dtype, broadcastable)
def get_gpu_tensor(self):
broadcastable = (False,) * self.tensor_size
return GpuArrayType(self.dtype, broadcastable)()
def get_host_value(self):
return numpy_random_array(self.shape)
def get_gpu_value(self):
return rand_gpuarray(*self.shape)
# NB: In compute_host() and compute_gpu(),
# the first call of the theano function should be ignored in profiling,
# with Theano config flag profiling.ignore_first_call=True.
def compute_host(self, test_tensor, axis):
M = self.get_host_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='shape:' + str(test_tensor.shape) + '/axis:' + str(axis) + '/HOST', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
f(test_tensor)
theano_max, theano_argmax = f(test_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
M = self.get_gpu_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='shape:' + str(test_gpu_tensor.shape) + '/axis:' + str(axis) + '/GPU', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
f(test_gpu_tensor)
theano_max, theano_argmax = f(test_gpu_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_host_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute(self, axis=None):
# We want to run CPU op and GPU op on the same tensor randomly generated.
test_gpu_tensor = self.get_gpu_value()
test_host_tensor = np.asarray(test_gpu_tensor)
self.compute_host(test_host_tensor, axis)
self.compute_gpu(test_gpu_tensor, test_host_tensor, axis)
def compute_axis(self, pos):
if self.tensor_size != 1 and 0 <= pos < self.tensor_size:
self.compute(pos)
def compute_some_axes(self, count):
if 0 <= count < self.tensor_size:
self.compute([i for i in unsorted_axes if i < self.tensor_size][:count])
# Equivalent to test reduction on all axes.
def test_none(self):
self.compute(None)
def test_axis_1(self):
self.compute_axis(0)
def test_axis_2(self):
self.compute_axis(1)
def test_axis_3(self):
self.compute_axis(2)
def test_axis_4(self):
self.compute_axis(3)
def test_axis_5(self):
self.compute_axis(4)
# For the tests below, we expect CPU op to run with Python implementation.
def test_2_axes(self):
self.compute_some_axes(2)
def test_3_axes(self):
self.compute_some_axes(3)
def test_4_axes(self):
self.compute_some_axes(4)
class TestScalar(BaseTest, TestCase):
tensor_size = 0
class TestVector(BaseTest, TestCase):
tensor_size = 1
# Special case
class TestRow(BaseTest, TestCase):
tensor_size = 2
shape = [1, test_size]
# Special case
class TestColumn(BaseTest, TestCase):
tensor_size = 2
shape = [test_size, 1]
class TestMatrix(BaseTest, TestCase):
tensor_size = 2
class TestTensor5(BaseTest, TestCase):
tensor_size = 5
...@@ -15,6 +15,7 @@ from theano.compat import izip ...@@ -15,6 +15,7 @@ from theano.compat import izip
from theano.configparser import config from theano.configparser import config
from theano import gof from theano import gof
from theano.gof import Apply, Constant, Op, Variable from theano.gof import Apply, Constant, Op, Variable
from theano.gof.type import Generic
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor.var import (AsTensorError, TensorVariable, from theano.tensor.var import (AsTensorError, TensorVariable,
...@@ -1181,72 +1182,32 @@ class MaxAndArgmax(Op): ...@@ -1181,72 +1182,32 @@ class MaxAndArgmax(Op):
nin = 2 # tensor, axis nin = 2 # tensor, axis
nout = 2 # max val, max idx nout = 2 # max val, max idx
E_axis = 'invalid axis' E_axis = 'invalid axis'
__props__ = () params_type = Generic()
__props__ = ('axis',)
def make_node(self, x, axis=None):
x = _as_tensor_variable(x)
if isinstance(axis, (integer_types, numpy.integer)):
axis = [int(axis)]
elif isinstance(axis, numpy.ndarray) and axis.ndim == 0:
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(a) for a in axis]
if axis == list(range(x.type.ndim)):
axis = None
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = None
elif not isinstance(axis, TensorConstant):
raise TypeError(
"MaxAndArgmax needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or
axis.dtype.startswith("uint"))
if isinstance(axis.data, (integer_types, numpy.integer)) or \
(isinstance(axis.data, numpy.ndarray) and
axis.data.ndim == 0):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
# Make axis entries non-negative, and sort them def __init__(self, axis):
if isinstance(axis, list): assert isinstance(axis, list)
for idx in xrange(len(axis)): self.axis = tuple(axis)
if axis[idx] < 0:
axis[idx] += x.type.ndim
axis.sort()
# Verify that axes are valid def get_params(self, node):
all_axes = [] return self.axis
if isinstance(axis, list):
for ax in axis:
if ax < 0 or ax >= x.type.ndim:
raise ValueError(
'Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (ax, x.type.ndim))
if ax not in all_axes:
all_axes.append(ax)
else:
all_axes = list(range(x.ndim))
if axis is None or axis == list(range(x.type.ndim)): def make_node(self, x):
axis = NoneConst.clone() x = _as_tensor_variable(x)
else:
axis = _as_tensor_variable(all_axes)
assert axis.ndim == 1
inputs = [x, axis]
# We keep the original broadcastable flags for dimensions on which # We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax. # we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(x.type.broadcastable) broadcastable = [b for i, b in enumerate(x.type.broadcastable)
if i not in all_axes] if i not in all_axes]
inputs = [x]
outputs = [tensor(x.type.dtype, broadcastable, name='max'), outputs = [tensor(x.type.dtype, broadcastable, name='max'),
tensor('int64', broadcastable, name='argmax')] tensor('int64', broadcastable, name='argmax')]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, inp, outs): def perform(self, node, inp, outs, params):
x, axes = inp x = inp[0]
axes = params
max, max_idx = outs max, max_idx = outs
if axes is None: if axes is None:
axes = tuple(range(x.ndim)) axes = tuple(range(x.ndim))
...@@ -1269,35 +1230,46 @@ class MaxAndArgmax(Op): ...@@ -1269,35 +1230,46 @@ class MaxAndArgmax(Op):
dtype='int64') dtype='int64')
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, axis = inp if len(self.axis) != 1 and len(self.axis) != node.inputs[0].ndim:
raise NotImplementedError("NumPy C-API can compute max and argmax only for 1 axis or for all axes.")
x = inp[0]
axis = sub['params']
max, argmax = out max, argmax = out
fail = sub["fail"] fail = sub["fail"]
if NoneConst.equals(node.inputs[1]): ret = """
axis_code = "axis = NPY_MAXDIMS;" #if PY_MAJOR_VERSION >= 3
else: #ifndef PyInt_AS_LONG
assert node.inputs[1].ndim == 1 #define PyInt_AS_LONG PyLong_AS_LONG
# Fall back to perform() if there are multiple axes #endif
if len(node.inputs[1].data) > 1: #endif
raise NotImplementedError()
axis_code = """ int axis;
axis = ((dtype_%(axis)s*)PyArray_DATA(%(axis)s))[0];
if(axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)){ if (PyTuple_GET_SIZE(%(axis)s) == PyArray_NDIM(%(x)s)) {
axis = NPY_MAXDIMS;
} else if(PyTuple_GET_SIZE(%(axis)s) == 1) {
PyObject* axis_object = PyTuple_GET_ITEM(%(axis)s, 0);
axis = (int)PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
if (axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"MaxAndArgmax, bad axis argument"); "MaxAndArgmax: bad axis argument");
%(fail)s
}
} else {
PyErr_SetString(PyExc_NotImplementedError,
"MaxAndArgmax: NumPy C-API can compute max and argmax only for 1 axis or for all axes.");
%(fail)s %(fail)s
} }
""" % locals()
ret = """
int axis;
Py_CLEAR(%(max)s); Py_CLEAR(%(max)s);
Py_CLEAR(%(argmax)s);//todo pass them as out parameter. Py_CLEAR(%(argmax)s);//todo pass them as out parameter.
%(axis_code)s
%(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL); %(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL);
if(%(max)s == NULL){ if (%(max)s == NULL) {
%(fail)s; %(fail)s;
} }
if(!PyArray_CheckExact(%(max)s)){ if (!PyArray_CheckExact(%(max)s)) {
%(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL); %(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(max)s == NULL){ if(%(max)s == NULL){
%(fail)s; %(fail)s;
...@@ -1305,17 +1277,17 @@ class MaxAndArgmax(Op): ...@@ -1305,17 +1277,17 @@ class MaxAndArgmax(Op):
} }
%(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL); %(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL);
if(%(argmax)s == NULL){ if (%(argmax)s == NULL) {
Py_CLEAR(%(max)s); Py_CLEAR(%(max)s);
%(fail)s; %(fail)s;
} }
if(!PyArray_CheckExact(%(argmax)s)){ if (!PyArray_CheckExact(%(argmax)s)) {
%(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL); %(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(argmax)s == NULL){ if(%(argmax)s == NULL){
%(fail)s; %(fail)s;
} }
} }
if(PyArray_TYPE(%(argmax)s) != NPY_INT64){ if (PyArray_TYPE(%(argmax)s) != NPY_INT64) {
PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64); PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64);
if (NULL == tmp){ if (NULL == tmp){
%(fail)s; %(fail)s;
...@@ -1330,28 +1302,25 @@ class MaxAndArgmax(Op): ...@@ -1330,28 +1302,25 @@ class MaxAndArgmax(Op):
return (4,) return (4,)
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
ishape, axis_shape = shapes ishape = shapes[0]
axis = node.inputs[1] rval = tuple(ishape[i] for (i, b) in enumerate(
if axis.data is None: node.inputs[0].type.broadcastable) if i not in self.axis)
return [(), ()]
rval = tuple([ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in axis.data])
return [rval, rval] return [rval, rval]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
if eval_points[0] is None: if eval_points[0] is None:
return [None, None] return [None, None]
if not isinstance(inputs[1], theano.Constant): if len(self.axis) != 1:
raise ValueError(('R_op supported for arg_max only for ' raise ValueError(('R_op supported for arg_max only for '
'constant axis!')) 'one axis!'))
if inputs[1].data > 1: if self.axis[0] > 1:
raise ValueError(('R_op supported for arg_max only when ' raise ValueError(('R_op supported for arg_max only when '
' axis is 0 or 1')) ' axis is 0 or 1'))
if inputs[0].ndim != 2: if inputs[0].ndim != 2:
raise ValueError(('R_op supported for arg_max only when ' raise ValueError(('R_op supported for arg_max only when '
' input is a matrix')) ' input is a matrix'))
max_vals, max_pos = self.make_node(*inputs).outputs max_vals, max_pos = self.make_node(*inputs).outputs
if inputs[1].data == 0: if self.axis[0] == 0:
return [eval_points[0][max_pos, return [eval_points[0][max_pos,
arange(eval_points[0].shape[1])], None] arange(eval_points[0].shape[1])], None]
else: else:
...@@ -1372,7 +1341,8 @@ class MaxAndArgmax(Op): ...@@ -1372,7 +1341,8 @@ class MaxAndArgmax(Op):
# g_max has one less dimension than x, so you need to complete # g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism # g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically # does it automatically
x, axis = inp x = inp[0]
axis = _as_tensor_variable(self.axis)
g_max, g_max_idx = grads g_max, g_max_idx = grads
g_max_disconnected = isinstance(g_max.type, DisconnectedType) g_max_disconnected = isinstance(g_max.type, DisconnectedType)
...@@ -1382,15 +1352,10 @@ class MaxAndArgmax(Op): ...@@ -1382,15 +1352,10 @@ class MaxAndArgmax(Op):
if g_max_disconnected and g_max_idx_disconnected: if g_max_disconnected and g_max_idx_disconnected:
return [DisconnectedType()(), DisconnectedType()()] return [DisconnectedType()(), DisconnectedType()()]
axis_grad = grad_undefined(
self, 1, axis,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined")
# if the max is disconnected but the argmax is not, # if the max is disconnected but the argmax is not,
# the gradient on its inputs is zero # the gradient on its inputs is zero
if g_max_disconnected: if g_max_disconnected:
return [x.zeros_like(), axis_grad] return [x.zeros_like()]
if NoneConst.equals(axis): if NoneConst.equals(axis):
axis_ = list(range(x.ndim)) axis_ = list(range(x.ndim))
else: else:
...@@ -1414,9 +1379,7 @@ class MaxAndArgmax(Op): ...@@ -1414,9 +1379,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position. # Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad g_x = eq(xmax_pad, x) * g_max_pad
return g_x, axis_grad return g_x,
_max_and_argmax = MaxAndArgmax()
class Argmax(Op): class Argmax(Op):
...@@ -1637,8 +1600,39 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -1637,8 +1600,39 @@ def max_and_argmax(a, axis=None, keepdims=False):
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
# Check axis and convert it to a Python list of integers.
out, argout = _max_and_argmax(a, axis) # Axis will be used as an op param of MaxAndArgmax.
if axis is None:
axis = list(range(a.type.ndim))
elif (isinstance(axis, (integer_types, numpy.integer)) or
(isinstance(axis, numpy.ndarray) and axis.ndim == 0)):
axis = [int(axis)]
elif isinstance(axis, (tuple, list, numpy.ndarray)):
axis = [int(i) for i in axis]
elif isinstance(axis, Variable):
if NoneConst.equals(axis):
axis = list(range(a.type.ndim))
elif not isinstance(axis, TensorConstant):
raise TypeError("max and argmax computation needs a constant axis. Got %s" % axis)
else:
assert (axis.dtype.startswith("int") or axis.dtype.startswith("uint"))
if (isinstance(axis.data, (integer_types, numpy.integer)) or
(isinstance(axis.data, numpy.ndarray) and axis.data.ndim == 0)):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
if len(axis) == 0:
axis = list(range(a.type.ndim))
else:
for i in range(len(axis)):
if axis[i] < 0:
axis[i] += a.type.ndim
if axis[i] < 0 or axis[i] >= a.type.ndim:
raise ValueError("max and argmax computation needs a valid axis number for %d-D tensor. Got %d"
% (a.type.ndim, axis[i]))
axis = list(set(axis))
axis.sort()
out, argout = MaxAndArgmax(axis)(a)
if keepdims: if keepdims:
out = makeKeepDims(a, out, axis) out = makeKeepDims(a, out, axis)
......
...@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node): ...@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([tensor._max_and_argmax]) @gof.local_optimizer([tensor.MaxAndArgmax])
def local_argmax_pushdown(node): def local_argmax_pushdown(node):
if node.op == tensor._max_and_argmax and node.inputs[0].owner and \ if isinstance(node.op, tensor.MaxAndArgmax) and node.inputs[0].owner and \
len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \ len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid, (softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias): softmax_with_bias):
...@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node): ...@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node):
"warning set the Theano flags 'warn.argmax_pushdown_bug' " "warning set the Theano flags 'warn.argmax_pushdown_bug' "
"to False") "to False")
if (node.op == tensor._max_and_argmax and if (isinstance(node.op, tensor.MaxAndArgmax) and
node.inputs[0].owner and len(node.outputs[0].clients) == 0): node.inputs[0].owner and len(node.outputs[0].clients) == 0):
x_max, x_argmax = node.outputs x_max, x_argmax = node.outputs
x, axis = node.inputs x = node.inputs[0]
axis = node.op.get_params(node)
# TODO: Make a list/set of monotonic ops... # TODO: Make a list/set of monotonic ops...
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp, if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid): tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs pre_x, = x.owner.inputs
ret = tensor._max_and_argmax(pre_x, axis) ret = tensor.max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret) copy_stack_trace(x_max, ret)
return ret return ret
if x.owner and x.owner.op == softmax_with_bias: if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs pre_x, pre_bias = x.owner.inputs
ret = tensor._max_and_argmax(pre_x + ret = tensor.max_and_argmax(pre_x +
tensor.DimShuffle( tensor.DimShuffle(
pre_bias.broadcastable, pre_bias.broadcastable,
('x', 0))(pre_bias), axis) ('x', 0))(pre_bias), axis)
......
...@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce ...@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
from theano.tensor import basic as T from theano.tensor import basic as T
from theano.tensor import DimShuffle from theano.tensor import DimShuffle
from theano.tensor.basic import (get_scalar_constant_value,
NotScalarConstantError)
from theano.tensor.opt import register_uncanonicalize from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal from theano import scalar as scal
...@@ -50,31 +48,19 @@ _logger = logging.getLogger('theano.tensor.opt') ...@@ -50,31 +48,19 @@ _logger = logging.getLogger('theano.tensor.opt')
@register_uncanonicalize @register_uncanonicalize
@gof.local_optimizer([T._max_and_argmax]) @gof.local_optimizer([T.MaxAndArgmax])
def local_max_and_argmax(node): def local_max_and_argmax(node):
""" """
If we don't use the argmax, change it to a max only. If we don't use the argmax, change it to a max only.
""" """
if node.op == T._max_and_argmax: if isinstance(node.op, T.MaxAndArgmax):
axis = node.op.get_params(node)
if len(node.outputs[1].clients) == 0: if len(node.outputs[1].clients) == 0:
# MaxAndArgmax support variable axis,
# but CAReduce support only constant axis.
if node.inputs[1].data is None:
axis = None
else:
try:
axis = get_scalar_constant_value(node.inputs[1])
except NotScalarConstantError:
axis = node.inputs[1]
if not isinstance(axis, T.TensorConstant):
return False
axis = axis.data
new = CAReduce(scal.maximum, axis)(node.inputs[0]) new = CAReduce(scal.maximum, axis)(node.inputs[0])
return [new, None] return [new, None]
if len(node.outputs[0].clients) == 0: if len(node.outputs[0].clients) == 0:
return [None, T._argmax(node.inputs[0], node.inputs[1])] return [None, T._argmax(node.inputs[0], axis)]
@register_uncanonicalize @register_uncanonicalize
......
...@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester): ...@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
# MaxAndArgmax, # MaxAndArgmax,
adtens3_val = rand(4, 5, 3) adtens3_val = rand(4, 5, 3)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, None), max_and_argmax(adtens3, None),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 0), max_and_argmax(adtens3, 0),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 1), max_and_argmax(adtens3, 1),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 2), max_and_argmax(adtens3, 2),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, [0, 1, 2]), max_and_argmax(adtens3, [0, 1, 2]),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
# ARange # ARange
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论