提交 b2ae1db6 authored 作者: notoraptor's avatar notoraptor

New update.

CPU op MaxAndArgmax rewritten so that it now takes the axes as a parameter op. Same update for GPU op GpuMaxAndArgmax. max_and_argmax wrapper rewritten to fully check the axis before passing it to MaxAndArgmax. Some other files have also been updated to ensure that the optimization which replace CPU op by GPU op works well and that all tests involving MaxAndArgmax still work well after the updates. GPU op rewritten to handle the last libgpuarray update. test_reduction rewritten. It now tests also 0-d arrays (scalars). I have run the following tests with success. The new update of libgpuarray has been downloaded, compiled and fully installed before running these tests. nosetests -v theano/gpuarray/tests/test_opt.py # There is 1 fail here, but not related to MaxAndArgmax: # ERROR: theano.gpuarray.tests.test_opt.test_local_lift_abstractconv_gpu_shape # RuntimeError: cuDNN is required for convolution and pooling nosetests -v theano/tensor/nnet/tests/test_nnet.py nosetests -v theano/tensor/tests/test_opt_uncanonicalize.py nosetests -v theano/tensor/tests/test_basic.py THEANO_FLAGS=floatX=float32,profile=True,profiling.n_ops=1000,profiling.n_apply=1000,profiling.ignore_first_call=True,profiling.destination=profiling.log nosetests --nocapture --verbose theano/gpuarray/tests/test_reduction.py Prevent Flake8!
上级 c2835d19
...@@ -65,7 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -65,7 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import gpu_maxandargmax from .reduction import GpuMaxAndArgmax
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
...@@ -1782,7 +1782,7 @@ def _scan_type_infer(node): ...@@ -1782,7 +1782,7 @@ def _scan_type_infer(node):
@op_lifter([tensor.MaxAndArgmax]) @op_lifter([tensor.MaxAndArgmax])
@register_opt2([tensor.MaxAndArgmax], 'fast_compile') @register_opt2([tensor.MaxAndArgmax], 'fast_compile')
def local_gpu_maxandargmax(op, context_name, inputs, outputs): def local_gpu_maxandargmax(op, context_name, inputs, outputs):
return gpu_maxandargmax return GpuMaxAndArgmax(op.get_params(None))
# Do not register in fast_run or fast_compile. # Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled. # It will be added to fast_run if the GPU is enabled.
......
from theano.gpuarray import GpuArrayType
from theano.tests import unittest_tools as utt
import numpy as np
import theano
import theano.tensor as T
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
test_shape = (1000, 100, 10, 5, 2)
def numpy_random_array(*shapes):
dimlist = shapes
size = 1
for dimsize in dimlist:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(dimlist)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = range(X.ndim)
elif not isinstance(axis, (tuple, list)):
axis = [axis]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Numpy does not support multiple axes for argmax. Work around
# Code copied from MaxAndArgmax.perform()
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
# We run all tests with 5-D tensors of 10 000 000 elements.
# NB: In each test, any first call of theano function should be ignored
# with Theano config flag profiling.ignore_first_call=True.
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
def run_gpu_tensor5(test_matrix=None, axis=None):
M = GpuArrayType(dtype=theano.config.floatX, broadcastable=(False,) * 5)()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='GPU-function', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
if test_matrix is None:
test_matrix = rand_gpuarray(*test_shape)
f(test_matrix)
theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpy_maxandargmax(np.asarray(test_matrix), axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def run_cpu_tensor5(test_matrix=None, axis=None):
M = T.tensor5()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)], name='cpu-function', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
if test_matrix is None:
test_matrix = numpy_random_array(*test_shape)
f(test_matrix)
theano_max, theano_argmax = f(test_matrix)
ref_max, ref_argmax = numpy_maxandargmax(test_matrix, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def run_tensor5(axis=None):
test_cpu_matrix = numpy_random_array(*test_shape)
test_gpu_matrix = rand_gpuarray(*test_shape)
run_cpu_tensor5(test_cpu_matrix, axis)
run_gpu_tensor5(test_gpu_matrix, axis)
def test_none():
run_tensor5(None)
def test_all_axes():
run_tensor5((0, 1, 2, 3, 4))
def test_all_axes_unsorted():
run_tensor5((4, 1, 3, 0, 2))
def test_axis_1():
run_tensor5(0)
def test_axis_2():
run_tensor5(1)
def test_axis_3():
run_tensor5(2)
def test_axis_4():
run_tensor5(3)
def test_axis_5():
run_tensor5(4)
def test_2_axes():
run_tensor5((0, 3))
def test_3_axes():
run_tensor5((0, 3, 4))
def test_4_axes():
run_tensor5((0, 1, 2, 4))
from __future__ import print_function, absolute_import, division
from unittest import TestCase
import numpy as np
import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tests.unittest_tools import SkipTest
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand_gpuarray
from .. import GpuArrayType
test_shape = (1000, 100, 10, 5, 2)
# NB: This order of "unsorted axes" is arbitrary and is here
# just to have the same informations on profile output
# from one test to another.
unsorted_axes = (2, 4, 0, 3, 1)
np.random.seed()
def numpy_random_array(shapes):
size = 1
for dimsize in shapes:
size *= dimsize
return np.random.normal(size=size).astype(theano.config.floatX).reshape(shapes)
def numpy_maxandargmax(X, axis=None):
if axis is None:
axis = range(X.ndim)
elif not isinstance(axis, (tuple, list)):
axis = [int(axis)]
axis = list(set(axis)) # remove duplicated values.
axis.sort()
axis = tuple(axis)
ref_max = np.max(X, axis=axis)
# Following code is copied from MaxAndArgmax.perform():
# Numpy does not support multiple axes for argmax. Work around.
keep_axes = np.array([i for i in range(X.ndim) if i not in axis], dtype='int64')
# Not-reduced axes in front
transposed_x = np.transpose(X, np.concatenate((keep_axes, axis)))
kept_shape = transposed_x.shape[:len(keep_axes)]
reduced_shape = transposed_x.shape[len(keep_axes):]
new_shape = kept_shape + (np.prod(reduced_shape),)
new_shape = tuple(int(i) for i in new_shape)
reshaped_x = transposed_x.reshape(new_shape)
return (ref_max, np.argmax(reshaped_x, axis=-1))
def check_if_gpu_maxandargmax_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) > 0
def check_if_gpu_maxandargmax_not_in_graph(theano_function):
assert len([node for node in theano_function.maker.fgraph.apply_nodes
if isinstance(node.op, theano.gpuarray.reduction.GpuMaxAndArgmax)]) == 0
class BaseTest:
# This attribute must be set in subclasses.
tensor_size = None
dtype = theano.config.floatX
def setUp(self):
if not isinstance(self.tensor_size, int):
raise SkipTest("No tensor ndim defined.")
if self.tensor_size < 0 or self.tensor_size > 5:
raise SkipTest("We allow from 0 (included) to 5 (inclued) dimensons for these tests.")
def get_host_tensor(self):
broadcastable = (False,) * self.tensor_size
return T.tensor(self.dtype, broadcastable)
def get_gpu_tensor(self):
broadcastable = (False,) * self.tensor_size
return GpuArrayType(self.dtype, broadcastable)()
def get_host_value(self):
return numpy_random_array(test_shape[:self.tensor_size])
def get_gpu_value(self):
return rand_gpuarray(*(test_shape[:self.tensor_size]))
# NB: In compute_host() and compute_gpu(),
# the first call of the theano function should be ignored in profiling,
# with Theano config flag profiling.ignore_first_call=True.
def compute_host(self, test_tensor, axis):
M = self.get_host_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='HOST-function', mode=mode_without_gpu)
check_if_gpu_maxandargmax_not_in_graph(f)
f(test_tensor)
theano_max, theano_argmax = f(test_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
M = self.get_gpu_tensor()
f = theano.function([M], [T.max(M, axis=axis), T.argmax(M, axis=axis)],
name='GPU-function', mode=mode_with_gpu)
check_if_gpu_maxandargmax_in_graph(f)
f(test_gpu_tensor)
theano_max, theano_argmax = f(test_gpu_tensor)
ref_max, ref_argmax = numpy_maxandargmax(test_host_tensor, axis=axis)
utt.assert_allclose(ref_max, theano_max)
utt.assert_allclose(ref_argmax, theano_argmax)
def compute(self, axis=None):
# We want to run CPU op and GPU op on the same tensor randomly generated.
test_gpu_tensor = self.get_gpu_value()
test_host_tensor = np.asarray(test_gpu_tensor)
self.compute_host(test_host_tensor, axis)
self.compute_gpu(test_gpu_tensor, test_host_tensor, axis)
def compute_axis(self, pos):
if 0 <= pos < self.tensor_size:
self.compute(pos)
def compute_some_axes(self, count):
if 0 <= count <= self.tensor_size:
self.compute([i for i in unsorted_axes if i < self.tensor_size][:count])
def test_none(self):
self.compute(None)
def test_all_axes(self):
self.compute(range(self.tensor_size))
def test_all_axes_unsorted(self):
self.compute([i for i in unsorted_axes if i < self.tensor_size])
def test_axis_1(self):
self.compute_axis(0)
def test_axis_2(self):
self.compute_axis(1)
def test_axis_3(self):
self.compute_axis(2)
def test_axis_4(self):
self.compute_axis(3)
def test_axis_5(self):
self.compute_axis(4)
# For the tests below, we expect CPU op to run with Python implementation.
def test_2_axes(self):
self.compute_some_axes(2)
def test_3_axes(self):
self.compute_some_axes(3)
def test_4_axes(self):
self.compute_some_axes(4)
class TestScalar(BaseTest, TestCase):
tensor_size = 0
class TestVector(BaseTest, TestCase):
tensor_size = 1
class TestMatrix(BaseTest, TestCase):
tensor_size = 2
class TestTensor5(BaseTest, TestCase):
tensor_size = 5
...@@ -15,6 +15,7 @@ from theano.compat import izip ...@@ -15,6 +15,7 @@ from theano.compat import izip
from theano.configparser import config from theano.configparser import config
from theano import gof from theano import gof
from theano.gof import Apply, Constant, Op, Variable from theano.gof import Apply, Constant, Op, Variable
from theano.gof.type import Generic
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor.var import (AsTensorError, TensorVariable, from theano.tensor.var import (AsTensorError, TensorVariable,
...@@ -1181,45 +1182,32 @@ class MaxAndArgmax(Op): ...@@ -1181,45 +1182,32 @@ class MaxAndArgmax(Op):
nin = 2 # tensor, axis nin = 2 # tensor, axis
nout = 2 # max val, max idx nout = 2 # max val, max idx
E_axis = 'invalid axis' E_axis = 'invalid axis'
__props__ = () params_type = Generic()
__props__ = ('axis',)
def make_node(self, x, axis=None):
x = _as_tensor_variable(x)
if axis is None: def __init__(self, axis):
axis = range(x.type.ndim) assert isinstance(axis, list)
elif not isinstance(axis, list): self.axis = tuple(axis)
raise TypeError("Axis must be a list. Got %s" % axis)
# Make axis entries non-negative, and sort them def get_params(self, node):
for idx in xrange(len(axis)): return self.axis
if axis[idx] < 0:
axis[idx] += x.type.ndim
axis.sort()
# Verify that axes are valid def make_node(self, x):
all_axes = [] x = _as_tensor_variable(x)
for ax in axis:
if ax < 0 or ax >= x.type.ndim:
raise ValueError(
'Invalid axis: %s (the number of dimensions of the '
'input is: %s)' % (ax, x.type.ndim))
if ax not in all_axes:
all_axes.append(ax)
axis = _as_tensor_variable(all_axes)
assert axis.ndim == 1
inputs = [x, axis]
# We keep the original broadcastable flags for dimensions on which # We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax. # we do not perform the max / argmax.
all_axes = set(self.axis)
broadcastable = [b for i, b in enumerate(x.type.broadcastable) broadcastable = [b for i, b in enumerate(x.type.broadcastable)
if i not in all_axes] if i not in all_axes]
inputs = [x]
outputs = [tensor(x.type.dtype, broadcastable, name='max'), outputs = [tensor(x.type.dtype, broadcastable, name='max'),
tensor('int64', broadcastable, name='argmax')] tensor('int64', broadcastable, name='argmax')]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, inp, outs): def perform(self, node, inp, outs, params):
x, axes = inp x = inp[0]
axes = params
max, max_idx = outs max, max_idx = outs
if axes is None: if axes is None:
axes = tuple(range(x.ndim)) axes = tuple(range(x.ndim))
...@@ -1242,35 +1230,40 @@ class MaxAndArgmax(Op): ...@@ -1242,35 +1230,40 @@ class MaxAndArgmax(Op):
dtype='int64') dtype='int64')
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, axis = inp if len(self.axis) != 1 and len(self.axis) != node.inputs[0].ndim:
raise NotImplementedError("NumPy C-API can compute max and argmax only for 1 axis or for all axes.")
x = inp[0]
axis = sub['params']
max, argmax = out max, argmax = out
fail = sub["fail"] fail = sub["fail"]
if NoneConst.equals(node.inputs[1]) or len(node.inputs[1].data) == node.inputs[0].ndim: ret = """
axis_code = "axis = NPY_MAXDIMS;" int axis;
else:
assert node.inputs[1].ndim == 1 if (PyTuple_GET_SIZE(%(axis)s) == PyArray_NDIM(%(x)s)) {
# Fall back to perform() if there are multiple axes axis = NPY_MAXDIMS;
if len(node.inputs[1].data) > 1: } else if(PyTuple_GET_SIZE(%(axis)s) == 1) {
raise NotImplementedError() PyObject* axis_object = PyTuple_GET_ITEM(%(axis)s, 0);
axis_code = """ axis = (int)PyInt_AS_LONG(axis_object);
axis = ((dtype_%(axis)s*)PyArray_DATA(%(axis)s))[0]; Py_XDECREF(axis_object);
if(axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)){ if (axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"MaxAndArgmax, bad axis argument"); "MaxAndArgmax: bad axis argument");
%(fail)s %(fail)s
} }
""" % locals() } else {
ret = """ PyErr_SetString(PyExc_NotImplementedError,
int axis; "MaxAndArgmax: NumPy C-API can compute max and argmax only for 1 axis or for all axes.");
%(fail)s
}
Py_CLEAR(%(max)s); Py_CLEAR(%(max)s);
Py_CLEAR(%(argmax)s);//todo pass them as out parameter. Py_CLEAR(%(argmax)s);//todo pass them as out parameter.
%(axis_code)s
%(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL); %(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL);
if(%(max)s == NULL){ if (%(max)s == NULL) {
%(fail)s; %(fail)s;
} }
if(!PyArray_CheckExact(%(max)s)){ if (!PyArray_CheckExact(%(max)s)) {
%(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL); %(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(max)s == NULL){ if(%(max)s == NULL){
%(fail)s; %(fail)s;
...@@ -1278,17 +1271,17 @@ class MaxAndArgmax(Op): ...@@ -1278,17 +1271,17 @@ class MaxAndArgmax(Op):
} }
%(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL); %(argmax)s = (PyArrayObject*)PyArray_ArgMax(%(x)s, axis, NULL);
if(%(argmax)s == NULL){ if (%(argmax)s == NULL) {
Py_CLEAR(%(max)s); Py_CLEAR(%(max)s);
%(fail)s; %(fail)s;
} }
if(!PyArray_CheckExact(%(argmax)s)){ if (!PyArray_CheckExact(%(argmax)s)) {
%(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL); %(argmax)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(argmax)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(%(argmax)s == NULL){ if(%(argmax)s == NULL){
%(fail)s; %(fail)s;
} }
} }
if(PyArray_TYPE(%(argmax)s) != NPY_INT64){ if (PyArray_TYPE(%(argmax)s) != NPY_INT64) {
PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64); PyObject * tmp = PyArray_Cast(%(argmax)s, NPY_INT64);
if (NULL == tmp){ if (NULL == tmp){
%(fail)s; %(fail)s;
...@@ -1303,28 +1296,25 @@ class MaxAndArgmax(Op): ...@@ -1303,28 +1296,25 @@ class MaxAndArgmax(Op):
return (4,) return (4,)
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
ishape, axis_shape = shapes ishape = shapes[0]
axis = node.inputs[1] rval = tuple(ishape[i] for (i, b) in enumerate(
if axis.data is None: node.inputs[0].type.broadcastable) if i not in self.axis)
return [(), ()]
rval = tuple([ishape[i] for (i, b) in enumerate(
node.inputs[0].type.broadcastable) if i not in axis.data])
return [rval, rval] return [rval, rval]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
if eval_points[0] is None: if eval_points[0] is None:
return [None, None] return [None, None]
if not isinstance(inputs[1], theano.Constant): if len(self.axis) != 1:
raise ValueError(('R_op supported for arg_max only for ' raise ValueError(('R_op supported for arg_max only for '
'constant axis!')) 'constant axis!'))
if inputs[1].data > 1: if self.axis[0] > 1:
raise ValueError(('R_op supported for arg_max only when ' raise ValueError(('R_op supported for arg_max only when '
' axis is 0 or 1')) ' axis is 0 or 1'))
if inputs[0].ndim != 2: if inputs[0].ndim != 2:
raise ValueError(('R_op supported for arg_max only when ' raise ValueError(('R_op supported for arg_max only when '
' input is a matrix')) ' input is a matrix'))
max_vals, max_pos = self.make_node(*inputs).outputs max_vals, max_pos = self.make_node(*inputs).outputs
if inputs[1].data == 0: if self.axis[0] == 0:
return [eval_points[0][max_pos, return [eval_points[0][max_pos,
arange(eval_points[0].shape[1])], None] arange(eval_points[0].shape[1])], None]
else: else:
...@@ -1345,7 +1335,8 @@ class MaxAndArgmax(Op): ...@@ -1345,7 +1335,8 @@ class MaxAndArgmax(Op):
# g_max has one less dimension than x, so you need to complete # g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism # g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically # does it automatically
x, axis = inp x = inp[0]
axis = _as_tensor_variable(self.axis)
g_max, g_max_idx = grads g_max, g_max_idx = grads
g_max_disconnected = isinstance(g_max.type, DisconnectedType) g_max_disconnected = isinstance(g_max.type, DisconnectedType)
...@@ -1363,7 +1354,7 @@ class MaxAndArgmax(Op): ...@@ -1363,7 +1354,7 @@ class MaxAndArgmax(Op):
# if the max is disconnected but the argmax is not, # if the max is disconnected but the argmax is not,
# the gradient on its inputs is zero # the gradient on its inputs is zero
if g_max_disconnected: if g_max_disconnected:
return [x.zeros_like(), axis_grad] return [x.zeros_like()]
if NoneConst.equals(axis): if NoneConst.equals(axis):
axis_ = list(range(x.ndim)) axis_ = list(range(x.ndim))
else: else:
...@@ -1387,9 +1378,7 @@ class MaxAndArgmax(Op): ...@@ -1387,9 +1378,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position. # Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad g_x = eq(xmax_pad, x) * g_max_pad
return g_x, axis_grad return g_x,
_max_and_argmax = MaxAndArgmax()
class Argmax(Op): class Argmax(Op):
...@@ -1611,6 +1600,7 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -1611,6 +1600,7 @@ def max_and_argmax(a, axis=None, keepdims=False):
""" """
# Check axis and convert it to a Python list of integers. # Check axis and convert it to a Python list of integers.
# Axis will be used as an op param of MaxAndArgmax.
if axis is None: if axis is None:
axis = range(a.type.ndim) axis = range(a.type.ndim)
elif (isinstance(axis, (integer_types, numpy.integer)) or elif (isinstance(axis, (integer_types, numpy.integer)) or
...@@ -1630,8 +1620,18 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -1630,8 +1620,18 @@ def max_and_argmax(a, axis=None, keepdims=False):
axis = [int(axis.data)] axis = [int(axis.data)]
elif isinstance(axis.data, (list, numpy.ndarray)): elif isinstance(axis.data, (list, numpy.ndarray)):
axis = [int(i) for i in axis.data] axis = [int(i) for i in axis.data]
if len(axis) == 0:
out, argout = _max_and_argmax(a, axis) axis = range(a.type.ndim)
else:
for i in range(len(axis)):
if axis[i] < 0:
axis[i] += a.type.ndim
if axis[i] < 0 or axis[i] >= a.type.ndim:
raise ValueError("max and argmax computation needs a valid axis number for %d-D tensor. Got %d"
% (a.type.ndim, axis[i]))
axis = list(set(axis))
axis.sort()
out, argout = MaxAndArgmax(axis)(a)
if keepdims: if keepdims:
out = makeKeepDims(a, out, axis) out = makeKeepDims(a, out, axis)
......
...@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node): ...@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([tensor._max_and_argmax]) @gof.local_optimizer([tensor.MaxAndArgmax])
def local_argmax_pushdown(node): def local_argmax_pushdown(node):
if node.op == tensor._max_and_argmax and node.inputs[0].owner and \ if isinstance(node.op, tensor.MaxAndArgmax) and node.inputs[0].owner and \
len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \ len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid, (softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias): softmax_with_bias):
...@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node): ...@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node):
"warning set the Theano flags 'warn.argmax_pushdown_bug' " "warning set the Theano flags 'warn.argmax_pushdown_bug' "
"to False") "to False")
if (node.op == tensor._max_and_argmax and if (isinstance(node.op, tensor.MaxAndArgmax) and
node.inputs[0].owner and len(node.outputs[0].clients) == 0): node.inputs[0].owner and len(node.outputs[0].clients) == 0):
x_max, x_argmax = node.outputs x_max, x_argmax = node.outputs
x, axis = node.inputs x = node.inputs[0]
axis = node.op.get_params(node)
# TODO: Make a list/set of monotonic ops... # TODO: Make a list/set of monotonic ops...
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp, if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid): tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs pre_x, = x.owner.inputs
ret = tensor._max_and_argmax(pre_x, axis) ret = tensor.max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret) copy_stack_trace(x_max, ret)
return ret return ret
if x.owner and x.owner.op == softmax_with_bias: if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs pre_x, pre_bias = x.owner.inputs
ret = tensor._max_and_argmax(pre_x + ret = tensor.max_and_argmax(pre_x +
tensor.DimShuffle( tensor.DimShuffle(
pre_bias.broadcastable, pre_bias.broadcastable,
('x', 0))(pre_bias), axis) ('x', 0))(pre_bias), axis)
......
...@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce ...@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
from theano.tensor import basic as T from theano.tensor import basic as T
from theano.tensor import DimShuffle from theano.tensor import DimShuffle
from theano.tensor.basic import (get_scalar_constant_value,
NotScalarConstantError)
from theano.tensor.opt import register_uncanonicalize from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal from theano import scalar as scal
...@@ -50,25 +48,18 @@ _logger = logging.getLogger('theano.tensor.opt') ...@@ -50,25 +48,18 @@ _logger = logging.getLogger('theano.tensor.opt')
@register_uncanonicalize @register_uncanonicalize
@gof.local_optimizer([T._max_and_argmax]) @gof.local_optimizer([T.MaxAndArgmax])
def local_max_and_argmax(node): def local_max_and_argmax(node):
""" """
If we don't use the argmax, change it to a max only. If we don't use the argmax, change it to a max only.
""" """
if node.op == T._max_and_argmax: if isinstance(node.op, T.MaxAndArgmax):
if len(node.outputs[1].clients) == 0: if len(node.outputs[1].clients) == 0:
# MaxAndArgmax support variable axis, # MaxAndArgmax support variable axis,
# but CAReduce support only constant axis. # but CAReduce support only constant axis.
if node.inputs[1].data is None: axis = node.op.get_params(node)
axis = None if len(axis) != 1:
else: return False
try:
axis = get_scalar_constant_value(node.inputs[1])
except NotScalarConstantError:
axis = node.inputs[1]
if not isinstance(axis, T.TensorConstant):
return False
axis = axis.data
new = CAReduce(scal.maximum, axis)(node.inputs[0]) new = CAReduce(scal.maximum, axis)(node.inputs[0])
return [new, None] return [new, None]
......
...@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester): ...@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
# MaxAndArgmax, # MaxAndArgmax,
adtens3_val = rand(4, 5, 3) adtens3_val = rand(4, 5, 3)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, None), max_and_argmax(adtens3, None),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 0), max_and_argmax(adtens3, 0),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 1), max_and_argmax(adtens3, 1),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, 2), max_and_argmax(adtens3, 2),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
self._compile_and_check([adtens3], self._compile_and_check([adtens3],
MaxAndArgmax()(adtens3, [0, 1, 2]), max_and_argmax(adtens3, [0, 1, 2]),
[adtens3_val], MaxAndArgmax) [adtens3_val], MaxAndArgmax)
# ARange # ARange
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论