提交 b2ae1db6 authored 作者: notoraptor's avatar notoraptor

New update.

CPU op MaxAndArgmax rewritten so that it now takes the axes as a parameter op. Same update for GPU op GpuMaxAndArgmax. max_and_argmax wrapper rewritten to fully check the axis before passing it to MaxAndArgmax. Some other files have also been updated to ensure that the optimization which replace CPU op by GPU op works well and that all tests involving MaxAndArgmax still work well after the updates. GPU op rewritten to handle the last libgpuarray update. test_reduction rewritten. It now tests also 0-d arrays (scalars). I have run the following tests with success. The new update of libgpuarray has been downloaded, compiled and fully installed before running these tests. nosetests -v theano/gpuarray/tests/test_opt.py # There is 1 fail here, but not related to MaxAndArgmax: # ERROR: theano.gpuarray.tests.test_opt.test_local_lift_abstractconv_gpu_shape # RuntimeError: cuDNN is required for convolution and pooling nosetests -v theano/tensor/nnet/tests/test_nnet.py nosetests -v theano/tensor/tests/test_opt_uncanonicalize.py nosetests -v theano/tensor/tests/test_basic.py THEANO_FLAGS=floatX=float32,profile=True,profiling.n_ops=1000,profiling.n_apply=1000,profiling.ignore_first_call=True,profiling.destination=profiling.log nosetests --nocapture --verbose theano/gpuarray/tests/test_reduction.py Prevent Flake8!
上级 c2835d19
......@@ -65,7 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import gpu_maxandargmax
from .reduction import GpuMaxAndArgmax
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -1782,7 +1782,7 @@ def _scan_type_infer(node):
@op_lifter([tensor.MaxAndArgmax])
@register_opt2([tensor.MaxAndArgmax], 'fast_compile')
def local_gpu_maxandargmax(op, context_name, inputs, outputs):
return gpu_maxandargmax
return GpuMaxAndArgmax(op.get_params(None))
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
......
from __future__ import print_function, absolute_import, division
import os
import numpy
import theano
from theano.gof import Op, Apply
from theano.tensor.var import TensorConstant
from theano.gof.type import Generic
from .basic_ops import (infer_context_name, as_gpuarray_variable)
from .type import GpuArrayType
......@@ -18,57 +18,29 @@ class GpuMaxAndArgmax(Op):
GPU version of MaxAndArgmax
"""
__props__ = ()
params_type = Generic()
__props__ = ('axis',)
argmax_dtype = "int64"
def make_node(self, X, axis=None):
def __init__(self, axis):
assert isinstance(axis, (list, tuple))
self.axis = tuple(axis)
def get_params(self, node):
return self.axis
def make_node(self, X):
context_name = infer_context_name(X)
if axis is None:
axis = range(X.type.ndim)
elif isinstance(axis, TensorConstant) and isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
elif not isinstance(axis, list):
raise TypeError("Axis must be a list. Got %s" % axis)
# Make axis entries non-negative, and verify that axes are valid.
for idx in xrange(len(axis)):
if axis[idx] < 0:
axis[idx] += X.type.ndim
if axis[idx] < 0 or axis[idx] >= X.type.ndim:
raise ValueError('Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (axis[idx], X.type.ndim))
# Sort axes and make them unique.
axis_set = set(axis) # used to build "broadcastable" variable below.
axis = list(axis_set)
axis.sort()
axis = theano.tensor.as_tensor_variable(axis)
inputs = [as_gpuarray_variable(X, context_name), axis]
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(X.type.broadcastable)
if i not in axis_set]
if i not in all_axes]
inputs = [as_gpuarray_variable(X, context_name)]
outputs = [GpuArrayType(X.type.dtype, broadcastable, context_name=context_name, name='max')(),
GpuArrayType(self.argmax_dtype, broadcastable, context_name=context_name, name='argmax')()]
return Apply(self, inputs, outputs)
def perform(self, node, inputs, outputs):
# NB: I must rewrite this method with pygpu functions instead of numpy functions.
x, axes = inputs
max, max_idx = outputs
X = numpy.asarray(x)
axes = tuple(axes)
max[0] = theano._asarray(numpy.max(X, axes), dtype=node.outputs[0].dtype)
# Numpy does not support multiple axes for argmax
# Work around
keep_axes = numpy.array([i for i in range(X.ndim) if i not in axes], dtype='int64')
# Not-reduced axes in front
transposed_x = numpy.transpose(X, numpy.concatenate((keep_axes, axes)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (numpy.prod(reduced_shape),)
reshaped_x = transposed_x.reshape(new_shape)
max_idx[0] = theano._asarray(numpy.argmax(reshaped_x, axis=-1), dtype='int64')
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>']
......@@ -76,75 +48,71 @@ class GpuMaxAndArgmax(Op):
return [pygpu.get_include(), os.path.dirname(__file__)]
def c_code(self, node, name, input_names, output_names, sub):
# Recall: X, axes = input_names
# Recall: X = input_names[0]
# Recall: axes = sub['params']
# Recall: max, argmax = output_names
# Recall: fail = sub['fail']
max_typecode = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
argmax_typecode = pygpu.gpuarray.dtype_to_typecode(self.argmax_dtype)
axes_ctype = 'int64_t'
assert node.inputs[1].ndim == 1
ret = """
GpuArray temp;
GpuArray* %(name)s_input = &%(X)s->ga;
size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s);
unsigned %(name)s_redux_len = PyArray_DIM(%(axes)s, 0);
unsigned %(name)s_redux_len = PyTuple_GET_SIZE(%(axes)s);
unsigned* %(name)s_axes_to_reduce = (unsigned*)malloc(%(name)s_redux_len * sizeof(unsigned));
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
%(name)s_axes_to_reduce[i] = (unsigned) (*(%(axes_ctype)s*)PyArray_GETPTR1(%(axes)s, i));
PyObject* axis_object = PyTuple_GET_ITEM(%(axes)s, i);
%(name)s_axes_to_reduce[i] = (unsigned) PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
}
size_t %(name)s_input_ndim = PyGpuArray_NDIM(%(X)s);
size_t %(name)s_output_ndim = %(name)s_input_ndim - %(name)s_redux_len;
size_t* %(name)s_output_dims = NULL;
if (%(name)s_output_ndim == 0) {
/* Current backend function GpuArray_maxandargmax does not work when
* all axes need to be reduced. So to handle this case, we create a view
* of the input as a matrix with 1 row and as many columns as elements
* in the input, so that the 2nd dimenson of the matrix will be reduced. */
size_t total_size = 1;
for (size_t i = 0; i < %(name)s_input_ndim; ++i) {
total_size *= PyGpuArray_DIM(%(X)s, i);
size_t* %(name)s_output_dims = (size_t*)malloc(%(name)s_output_ndim * sizeof(size_t));
if (%(name)s_redux_len == 1) {
for (unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) {
%(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i);
}
size_t newdims[2] = {1, total_size};
%(name)s_input = &temp;
if (GA_NO_ERROR !=
GpuArray_reshape(%(name)s_input, &%(X)s->ga, 2, newdims, GA_ANY_ORDER, 0)
) {
%(fail)s
for (unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) {
%(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i);
}
%(name)s_redux_len = 1;
%(name)s_axes_to_reduce[0] = 1;
} else {
%(name)s_output_dims = (size_t*)malloc(%(name)s_output_ndim * sizeof(size_t));
if (%(name)s_redux_len == 1) {
for (unsigned i = 0; i < %(name)s_axes_to_reduce[0]; ++i) {
%(name)s_output_dims[i] = PyGpuArray_DIM(%(X)s, i);
}
for (unsigned i = %(name)s_axes_to_reduce[0] + 1; i < %(name)s_input_ndim; ++i) {
%(name)s_output_dims[i-1] = PyGpuArray_DIM(%(X)s, i);
}
} else {
int64_t current_input_pos = -1;
int64_t current_output_pos = -1;
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
for (++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
for (++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) {
int64_t current_input_pos = -1;
int64_t current_output_pos = -1;
for (unsigned i = 0; i < %(name)s_redux_len; ++i) {
for (++current_input_pos; current_input_pos < %(name)s_axes_to_reduce[i]; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
for (++current_input_pos; current_input_pos < %(name)s_input_ndim; ++current_input_pos) {
%(name)s_output_dims[++current_output_pos] = PyGpuArray_DIM(%(X)s, current_input_pos);
}
}
if (theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare max output.");
%(fail)s
}
if (theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare argmax output.");
%(fail)s
}
if (GA_NO_ERROR !=
GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, %(name)s_input, %(name)s_redux_len, %(name)s_axes_to_reduce)
if (%(name)s_input_ndim == 0) {
/* GpuArray_maxandargmax can't handle a 0-d array
* because it expects that 1 <= redux_len <= input_ndim.
* As input_ndim == 0, then 1 <= redux_len <= 0 is false.
* To handle this case we copy input to max and we set argmax to 0.
*/
if (GA_NO_ERROR != GpuArray_setarray(&%(max)s->ga, &%(X)s->ga)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to copy input to max when input is a scalar.");
%(fail)s
}
if (GA_NO_ERROR != GpuArray_memset(&%(argmax)s->ga, 0)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to set argmax to 0 when input is a scalar.");
%(fail)s
}
} else if (GA_NO_ERROR !=
GpuArray_maxandargmax(&%(max)s->ga, &%(argmax)s->ga, &%(X)s->ga, %(name)s_redux_len, %(name)s_axes_to_reduce)
) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to compute gpuarray maxandargmax.");
%(fail)s
}
"""
......@@ -153,8 +121,8 @@ class GpuMaxAndArgmax(Op):
GpuArray_sync(&%(max)s->ga);
GpuArray_sync(&%(argmax)s->ga);
"""
return ret % {'X': input_names[0], 'axes': input_names[1], 'max': output_names[0], 'argmax': output_names[1],
'axes_ctype': axes_ctype, 'max_typecode': max_typecode, 'argmax_typecode': argmax_typecode,
return ret % {'X': input_names[0], 'axes': sub['params'], 'max': output_names[0], 'argmax': output_names[1],
'max_typecode': max_typecode, 'argmax_typecode': argmax_typecode,
'name': name, 'fail': sub['fail']}
def c_code_cleanup(self, node, name, inputs, outputs, sub):
......@@ -162,6 +130,3 @@ class GpuMaxAndArgmax(Op):
free(%(name)s_output_dims);
free(%(name)s_axes_to_reduce);
""" % {'name': name, 'X': inputs[0]}
gpu_maxandargmax = GpuMaxAndArgmax()
from theano.gpuarray import GpuArrayType
from theano.tests import unittest_tools as utt
import numpy as np
import theano
import theano.tensor as T
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
test_shape = (1000, 100, 10, 5, 2)
def numpy_random_array(*shapes):
dimlist = shapes
size = 1
for dimsize in dimlist:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(dimlist)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = range(X.ndim)
elif not isinstance(axis, (tuple, list)):
axis = [axis]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Numpy does not support multiple axes for argmax. Work around
# Code copied from MaxAndArgmax.perform()
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
# We run all tests with 5-D tensors of 10 000 000 elements.
# NB: In each test, any first call of theano function should be ignored
# with Theano config flag profiling.ignore_first_call=True.
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
def run_gpu_tensor5(test_matrix=None, axis=None):
M = GpuArrayType(dtype=theano.config.floatX, broadcastable=(False,) * 5)()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='GPU-function', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
if test_matrix is None:
test_matrix = rand_gpuarray(*test_shape)
f(test_matrix)
theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpy_maxandargmax(np.asarray(test_matrix), axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def run_cpu_tensor5(test_matrix=None, axis=None):
M = T.tensor5()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='cpu-function', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
if test_matrix is None:
test_matrix = numpy_random_array(*test_shape)
f(test_matrix)
theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpy_maxandargmax(test_matrix, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def run_tensor5(axis=None):
test_cpu_matrix = numpy_random_array(*test_shape)
test_gpu_matrix = rand_gpuarray(*test_shape)
run_cpu_tensor5(test_cpu_matrix, axis)
run_gpu_tensor5(test_gpu_matrix, axis)
def test_none():
run_tensor5(None)
def test_all_axes():
run_tensor5((0, 1, 2, 3, 4))
def test_all_axes_unsorted():
run_tensor5((4, 1, 3, 0, 2))
def test_axis_1():
run_tensor5(0)
def test_axis_2():
run_tensor5(1)
def test_axis_3():
run_tensor5(2)
def test_axis_4():
run_tensor5(3)
def test_axis_5():
run_tensor5(4)
def test_2_axes():
run_tensor5((0, 3))
def test_3_axes():
run_tensor5((0, 3, 4))
def test_4_axes():
run_tensor5((0, 1, 2, 4))
from __future__ import print_function, absolute_import, division
from unittest import TestCase
import numpy as np
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tests.unittest_tools import SkipTest
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
from .. import GpuArrayType
test_shape = (1000, 100, 10, 5, 2)
# NB: This order of "unsorted axes" is arbitrary and is here
# just to have the same informations on profile output
# from one test to another.
unsorted_axes = (2, 4, 0, 3, 1)
np.random.seed()
def numpy_random_array(shapes):
size = 1
for dimsize in shapes:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(shapes)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = range(X.ndim)
elif not isinstance(axis, (tuple, list)):
axis = [int(axis)]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Following code is copied from MaxAndArgmax.perform():
# Numpy does not support multiple axes for argmax. Work around.
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
new_shape = tuple(int(i) for i in new_shape)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
class BaseTest:
# This attribute must be set in subclasses.
tensor_size = None
dtype = theano.config.floatX
def setUp(self):
if not isinstance(self.tensor_size, int):
raise SkipTest("No tensor ndim defined.")
if self.tensor_size < 0 or self.tensor_size > 5:
raise SkipTest("We allow from 0 (included) to 5 (inclued) dimensons for these tests.")
def get_host_tensor(self):
broadcastable = (False,) * self.tensor_size
return T.tensor(self.dtype, broadcastable)
def get_gpu_tensor(self):
broadcastable = (False,) * self.tensor_size
return GpuArrayType(self.dtype, broadcastable)()
def get_host_value(self):
return numpy_random_array(test_shape[:self.tensor_size])
def get_gpu_value(self):
return rand_gpuarray(*(test_shape[:self.tensor_size]))
# NB: In compute_host() and compute_gpu(),
# the first call of the theano function should be ignored in profiling,
# with Theano config flag profiling.ignore_first_call=True.
def compute_host(self, test_tensor, axis):
M = self.get_host_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='HOST-function', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
f(test_tensor)
theano_max, theano_argmax = f(test_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
M = self.get_gpu_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='GPU-function', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
f(test_gpu_tensor)
theano_max, theano_argmax = f(test_gpu_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_host_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute(self, axis=None):
# We want to run CPU op and GPU op on the same tensor randomly generated.
test_gpu_tensor = self.get_gpu_value()
test_host_tensor = np.asarray(test_gpu_tensor)
self.compute_host(test_host_tensor, axis)
self.compute_gpu(test_gpu_tensor, test_host_tensor, axis)
def compute_axis(self, pos):
if 0 <= pos < self.tensor_size:
self.compute(pos)
def compute_some_axes(self, count):
if 0 <= count <= self.tensor_size:
self.compute([i for i in unsorted_axes if i < self.tensor_size][:count])
def test_none(self):
self.compute(None)
def test_all_axes(self):
self.compute(range(self.tensor_size))
def test_all_axes_unsorted(self):
self.compute([i for i in unsorted_axes if i < self.tensor_size])
def test_axis_1(self):
self.compute_axis(0)
def test_axis_2(self):
self.compute_axis(1)
def test_axis_3(self):
self.compute_axis(2)
def test_axis_4(self):
self.compute_axis(3)
def test_axis_5(self):
self.compute_axis(4)
# For the tests below, we expect CPU op to run with Python implementation.
def test_2_axes(self):
self.compute_some_axes(2)
def test_3_axes(self):
self.compute_some_axes(3)
def test_4_axes(self):
self.compute_some_axes(4)
class TestScalar(BaseTest, TestCase):
tensor_size = 0
class TestVector(BaseTest, TestCase):
tensor_size = 1
class TestMatrix(BaseTest, TestCase):
tensor_size = 2
class TestTensor5(BaseTest, TestCase):
tensor_size = 5
......@@ -15,6 +15,7 @@ from theano.compat import izip
from theano.configparser import config
from theano import gof
from theano.gof import Apply, Constant, Op, Variable
from theano.gof.type import Generic
from theano.tensor import elemwise
from theano.tensor.var import (AsTensorError, TensorVariable,
......@@ -1181,45 +1182,32 @@ class MaxAndArgmax(Op):
nin = 2 # tensor, axis
nout = 2 # max val, max idx
E_axis = 'invalid axis'
__props__ = ()
def make_node(self, x, axis=None):
x = _as_tensor_variable(x)
params_type = Generic()
__props__ = ('axis',)
if axis is None:
axis = range(x.type.ndim)
elif not isinstance(axis, list):
raise TypeError("Axis must be a list. Got %s" % axis)
def __init__(self, axis):
assert isinstance(axis, list)
self.axis = tuple(axis)
# Make axis entries non-negative, and sort them
for idx in xrange(len(axis)):
if axis[idx] < 0:
axis[idx] += x.type.ndim
axis.sort()
def get_params(self, node):
return self.axis
# Verify that axes are valid
all_axes = []
for ax in axis:
if ax < 0 or ax >= x.type.ndim:
raise ValueError(
'Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (ax, x.type.ndim))
if ax not in all_axes:
all_axes.append(ax)
axis = _as_tensor_variable(all_axes)
assert axis.ndim == 1
inputs = [x, axis]
def make_node(self, x):
x = _as_tensor_variable(x)
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(x.type.broadcastable)
if i not in all_axes]
inputs = [x]
outputs = [tensor(x.type.dtype, broadcastable, name='max'),
tensor('int64', broadcastable, name='argmax')]
return Apply(self, inputs, outputs)
def perform(self, node, inp, outs):
x, axes = inp
def perform(self, node, inp, outs, params):
x = inp[0]
axes = params
max, max_idx = outs
if axes is None:
axes = tuple(range(x.ndim))
......@@ -1242,35 +1230,40 @@ class MaxAndArgmax(Op):
dtype='int64')
def c_code(self, node, name, inp, out, sub):
x, axis = inp
if len(self.axis) != 1 and len(self.axis) != node.inputs[0].ndim:
raise NotImplementedError("NumPy C-API can compute max and argmax only for 1 axis or for all axes.")
x = inp[0]
axis = sub['params']
max, argmax = out
fail = sub["fail"]
if NoneConst.equals(node.inputs[1]) or len(node.inputs[1].data) == node.inputs[0].ndim:
axis_code = "axis = NPY_MAXDIMS;"
else:
assert node.inputs[1].ndim == 1
# Fall back to perform() if there are multiple axes
if len(node.inputs[1].data) > 1:
raise NotImplementedError()
axis_code = """
axis = ((dtype_%(axis)s*)PyArray_DATA(%(axis)s))[0];
if(axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)){
ret = """
int axis;
if (PyTuple_GET_SIZE(%(axis)s) == PyArray_NDIM(%(x)s)) {
axis = NPY_MAXDIMS;
} else if(PyTuple_GET_SIZE(%(axis)s) == 1) {
PyObject* axis_object = PyTuple_GET_ITEM(%(axis)s, 0);
axis = (int)PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
if (axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)) {
PyErr_SetString(PyExc_ValueError,
"MaxAndArgmax, bad axis argument");
"MaxAndArgmax: bad axis argument");
%(fail)s
}
""" % locals()
ret = """
int axis;
} else {
PyErr_SetString(PyExc_NotImplementedError,
"MaxAndArgmax: NumPy C-API can compute max and argmax only for 1 axis or for all axes.");
%(fail)s
}
Py_CLEAR(%(max)s);
Py_CLEAR(%(argmax)s);//todo pass them as out parameter.
%(axis_code)s
%(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL);
if(%(max)s == NULL){
if (%(max)s == NULL) {
%(fail)s;
}
if(!PyArray_CheckExact(%(max)s)){
if (!PyArray_CheckExact(%(max)s)) {
%(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(max)s == NULL){
%(fail)s;
......@@ -1278,17 +1271,17 @@ class MaxAndArgmax(Op):
}
%(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL);
if(%(argmax)s == NULL){
if (%(argmax)s == NULL) {
Py_CLEAR(%(max)s);
%(fail)s;
}
if(!PyArray_CheckExact(%(argmax)s)){
if (!PyArray_CheckExact(%(argmax)s)) {
%(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(argmax)s == NULL){
%(fail)s;
}
}
if(PyArray_TYPE(%(argmax)s) != NPY_INT64){
if (PyArray_TYPE(%(argmax)s) != NPY_INT64) {
PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64);
if (NULL == tmp){
%(fail)s;
......@@ -1303,28 +1296,25 @@ class MaxAndArgmax(Op):
return (4,)
def infer_shape(self, node, shapes):
ishape, axis_shape = shapes
axis = node.inputs[1]
if axis.data is None:
return [(), ()]
rval = tuple([ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in axis.data])
ishape = shapes[0]
rval = tuple(ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in self.axis)
return [rval, rval]
def R_op(self, inputs, eval_points):
if eval_points[0] is None:
return [None, None]
if not isinstance(inputs[1], theano.Constant):
if len(self.axis) != 1:
raise ValueError(('R_op supported for arg_max only for '
'constant axis!'))
if inputs[1].data > 1:
if self.axis[0] > 1:
raise ValueError(('R_op supported for arg_max only when '
' axis is 0 or 1'))
if inputs[0].ndim != 2:
raise ValueError(('R_op supported for arg_max only when '
' input is a matrix'))
max_vals, max_pos = self.make_node(*inputs).outputs
if inputs[1].data == 0:
if self.axis[0] == 0:
return [eval_points[0][max_pos,
arange(eval_points[0].shape[1])], None]
else:
......@@ -1345,7 +1335,8 @@ class MaxAndArgmax(Op):
# g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
x, axis = inp
x = inp[0]
axis = _as_tensor_variable(self.axis)
g_max, g_max_idx = grads
g_max_disconnected = isinstance(g_max.type, DisconnectedType)
......@@ -1363,7 +1354,7 @@ class MaxAndArgmax(Op):
# if the max is disconnected but the argmax is not,
# the gradient on its inputs is zero
if g_max_disconnected:
return [x.zeros_like(), axis_grad]
return [x.zeros_like()]
if NoneConst.equals(axis):
axis_ = list(range(x.ndim))
else:
......@@ -1387,9 +1378,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad
return g_x, axis_grad
_max_and_argmax = MaxAndArgmax()
return g_x,
class Argmax(Op):
......@@ -1611,6 +1600,7 @@ def max_and_argmax(a, axis=None, keepdims=False):
"""
# Check axis and convert it to a Python list of integers.
# Axis will be used as an op param of MaxAndArgmax.
if axis is None:
axis = range(a.type.ndim)
elif (isinstance(axis, (integer_types, numpy.integer)) or
......@@ -1630,8 +1620,18 @@ def max_and_argmax(a, axis=None, keepdims=False):
axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data]
out, argout = _max_and_argmax(a, axis)
if len(axis) == 0:
axis = range(a.type.ndim)
else:
for i in range(len(axis)):
if axis[i] < 0:
axis[i] += a.type.ndim
if axis[i] < 0 or axis[i] >= a.type.ndim:
raise ValueError("max and argmax computation needs a valid axis number for %d-D tensor. Got %d"
% (a.type.ndim, axis[i]))
axis = list(set(axis))
axis.sort()
out, argout = MaxAndArgmax(axis)(a)
if keepdims:
out = makeKeepDims(a, out, axis)
......
......@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
@opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([tensor._max_and_argmax])
@gof.local_optimizer([tensor.MaxAndArgmax])
def local_argmax_pushdown(node):
if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
if isinstance(node.op, tensor.MaxAndArgmax) and node.inputs[0].owner and \
len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias):
......@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node):
"warning set the Theano flags 'warn.argmax_pushdown_bug' "
"to False")
if (node.op == tensor._max_and_argmax and
if (isinstance(node.op, tensor.MaxAndArgmax) and
node.inputs[0].owner and len(node.outputs[0].clients) == 0):
x_max, x_argmax = node.outputs
x, axis = node.inputs
x = node.inputs[0]
axis = node.op.get_params(node)
# TODO: Make a list/set of monotonic ops...
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs
ret = tensor._max_and_argmax(pre_x, axis)
ret = tensor.max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret)
return ret
if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs
ret = tensor._max_and_argmax(pre_x +
ret = tensor.max_and_argmax(pre_x +
tensor.DimShuffle(
pre_bias.broadcastable,
('x', 0))(pre_bias), axis)
......
......@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
from theano.tensor import basic as T
from theano.tensor import DimShuffle
from theano.tensor.basic import (get_scalar_constant_value,
NotScalarConstantError)
from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal
......@@ -50,25 +48,18 @@ _logger = logging.getLogger('theano.tensor.opt')
@register_uncanonicalize
@gof.local_optimizer([T._max_and_argmax])
@gof.local_optimizer([T.MaxAndArgmax])
def local_max_and_argmax(node):
"""
If we don't use the argmax, change it to a max only.
"""
if node.op == T._max_and_argmax:
if isinstance(node.op, T.MaxAndArgmax):
if len(node.outputs[1].clients) == 0:
# MaxAndArgmax support variable axis,
# but CAReduce support only constant axis.
if node.inputs[1].data is None:
axis = None
else:
try:
axis = get_scalar_constant_value(node.inputs[1])
except NotScalarConstantError:
axis = node.inputs[1]
if not isinstance(axis, T.TensorConstant):
return False
axis = axis.data
axis = node.op.get_params(node)
if len(axis) != 1:
return False
new = CAReduce(scal.maximum, axis)(node.inputs[0])
return [new, None]
......
......@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
# MaxAndArgmax,
adtens3_val = rand(4, 5, 3)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, None),
max_and_argmax(adtens3, None),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 0),
max_and_argmax(adtens3, 0),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 1),
max_and_argmax(adtens3, 1),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 2),
max_and_argmax(adtens3, 2),
[adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, [0, 1, 2]),
max_and_argmax(adtens3, [0, 1, 2]),
[adtens3_val], MaxAndArgmax)
# ARange
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论