Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3347480a
提交
3347480a
authored
11月 14, 2016
作者:
Pascal Lamblin
提交者:
GitHub
11月 14, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5168 from notoraptor/master
This is my proposal for GpuMaxAndArgmax (issue #1399).
上级
86fdfdaf
0f4436f9
显示空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
452 行增加
和
127 行删除
+452
-127
__init__.py
theano/gpuarray/__init__.py
+1
-1
opt.py
theano/gpuarray/opt.py
+9
-0
reduction.py
theano/gpuarray/reduction.py
+138
-0
test_reduction.py
theano/gpuarray/tests/test_reduction.py
+197
-0
basic.py
theano/tensor/basic.py
+91
-97
nnet.py
theano/tensor/nnet/nnet.py
+7
-6
opt_uncanonicalize.py
theano/tensor/opt_uncanonicalize.py
+4
-18
test_basic.py
theano/tensor/tests/test_basic.py
+5
-5
没有找到文件。
theano/gpuarray/__init__.py
浏览文件 @
3347480a
...
@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
...
@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
reg_context
,
get_context
,
ContextNotDefined
,
_get_props
)
reg_context
,
get_context
,
ContextNotDefined
,
_get_props
)
from
.basic_ops
import
as_gpuarray_variable
from
.basic_ops
import
as_gpuarray_variable
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
,
reduction
def
transfer
(
x
,
target
):
def
transfer
(
x
,
target
):
try
:
try
:
...
...
theano/gpuarray/opt.py
浏览文件 @
3347480a
...
@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
...
@@ -65,6 +65,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1_dev20
)
GpuAdvancedIncSubtensor1_dev20
)
from
.opt_util
import
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
from
.opt_util
import
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
from
.reduction
import
GpuMaxAndArgmax
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
...
@@ -1775,6 +1776,14 @@ def _scan_type_infer(node):
...
@@ -1775,6 +1776,14 @@ def _scan_type_infer(node):
context_name
=
context_name
)
context_name
=
context_name
)
return
typebuild
return
typebuild
# Add optimization : maxandargmax (CPU -> GPU)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
MaxAndArgmax
])
@register_opt2
([
tensor
.
MaxAndArgmax
],
'fast_compile'
)
def
local_gpu_maxandargmax
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuMaxAndArgmax
(
op
.
get_params
(
None
))
# Do not register in fast_run or fast_compile.
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
# It will be added to fast_run if the GPU is enabled.
optdb
.
register
(
'gpua_scanOp_make_inplace'
,
optdb
.
register
(
'gpua_scanOp_make_inplace'
,
...
...
theano/gpuarray/reduction.py
0 → 100644
浏览文件 @
3347480a
from
__future__
import
print_function
,
absolute_import
,
division
import
os
import
theano
from
theano.gof
import
Op
,
Apply
from
theano.gof.type
import
Generic
from
.basic_ops
import
(
infer_context_name
,
as_gpuarray_variable
)
from
.type
import
GpuArrayType
try
:
import
pygpu
except
ImportError
as
e
:
pass
class
GpuMaxAndArgmax
(
Op
):
"""
GPU version of MaxAndArgmax
"""
params_type
=
Generic
()
__props__
=
(
'axis'
,)
argmax_dtype
=
"int64"
def
__init__
(
self
,
axis
):
assert
isinstance
(
axis
,
(
list
,
tuple
))
self
.
axis
=
tuple
(
axis
)
def
get_params
(
self
,
node
):
return
self
.
axis
def
make_node
(
self
,
X
):
context_name
=
infer_context_name
(
X
)
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
all_axes
=
set
(
self
.
axis
)
broadcastable
=
[
b
for
i
,
b
in
enumerate
(
X
.
type
.
broadcastable
)
if
i
not
in
all_axes
]
inputs
=
[
as_gpuarray_variable
(
X
,
context_name
)]
outputs
=
[
GpuArrayType
(
X
.
type
.
dtype
,
broadcastable
,
context_name
=
context_name
,
name
=
'max'
)(),
GpuArrayType
(
self
.
argmax_dtype
,
broadcastable
,
context_name
=
context_name
,
name
=
'argmax'
)()]
return
Apply
(
self
,
inputs
,
outputs
)
def
c_headers
(
self
):
return
[
'<numpy_compat.h>'
,
'<gpuarray_helper.h>'
]
def
c_header_dirs
(
self
):
return
[
pygpu
.
get_include
(),
os
.
path
.
dirname
(
__file__
)]
def
c_code
(
self
,
node
,
name
,
input_names
,
output_names
,
sub
):
# Recall: X = input_names[0]
# Recall: axes = sub['params']
# Recall: max, argmax = output_names
# Recall: fail = sub['fail']
max_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
argmax_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
self
.
argmax_dtype
)
ret
=
"""
#if PY_MAJOR_VERSION >= 3
#ifndef PyInt_AS_LONG
#define PyInt_AS_LONG PyLong_AS_LONG
#endif
#endif
unsigned
%(name)
s_redux_len = PyTuple_GET_SIZE(
%(axes)
s);
unsigned*
%(name)
s_axes_to_reduce = (unsigned*)malloc(
%(name)
s_redux_len * sizeof(unsigned));
for (unsigned i = 0; i <
%(name)
s_redux_len; ++i) {
PyObject* axis_object = PyTuple_GET_ITEM(
%(axes)
s, i);
%(name)
s_axes_to_reduce[i] = (unsigned) PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
}
size_t
%(name)
s_input_ndim = PyGpuArray_NDIM(
%(X)
s);
size_t
%(name)
s_output_ndim =
%(name)
s_input_ndim -
%(name)
s_redux_len;
size_t*
%(name)
s_output_dims = (size_t*)malloc(
%(name)
s_output_ndim * sizeof(size_t));
if (
%(name)
s_redux_len == 1) {
for (unsigned i = 0; i <
%(name)
s_axes_to_reduce[0]; ++i) {
%(name)
s_output_dims[i] = PyGpuArray_DIM(
%(X)
s, i);
}
for (unsigned i =
%(name)
s_axes_to_reduce[0] + 1; i <
%(name)
s_input_ndim; ++i) {
%(name)
s_output_dims[i-1] = PyGpuArray_DIM(
%(X)
s, i);
}
} else {
int64_t current_input_pos = -1;
int64_t current_output_pos = -1;
for (unsigned i = 0; i <
%(name)
s_redux_len; ++i) {
for (++current_input_pos; current_input_pos <
%(name)
s_axes_to_reduce[i]; ++current_input_pos) {
%(name)
s_output_dims[++current_output_pos] = PyGpuArray_DIM(
%(X)
s, current_input_pos);
}
}
for (++current_input_pos; current_input_pos <
%(name)
s_input_ndim; ++current_input_pos) {
%(name)
s_output_dims[++current_output_pos] = PyGpuArray_DIM(
%(X)
s, current_input_pos);
}
}
if (theano_prep_output(&
%(max)
s,
%(name)
s_output_ndim,
%(name)
s_output_dims,
%(max_typecode)
s, GA_C_ORDER,
%(X)
s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare max output.");
%(fail)
s
}
if (theano_prep_output(&
%(argmax)
s,
%(name)
s_output_ndim,
%(name)
s_output_dims,
%(argmax_typecode)
s, GA_C_ORDER,
%(X)
s->context)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare argmax output.");
%(fail)
s
}
if (
%(name)
s_input_ndim == 0) {
/* GpuArray_maxandargmax can't handle a 0-d array
* because it expects that 1 <= redux_len <= input_ndim.
* As input_ndim == 0, then 1 <= redux_len <= 0 is false.
* To handle this case we copy input to max and we set argmax to 0.
*/
if (GA_NO_ERROR != GpuArray_setarray(&
%(max)
s->ga, &
%(X)
s->ga)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to copy input to max when input is a scalar.");
%(fail)
s
}
if (GA_NO_ERROR != GpuArray_memset(&
%(argmax)
s->ga, 0)) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to set argmax to 0 when input is a scalar.");
%(fail)
s
}
} else if (GA_NO_ERROR !=
GpuArray_maxandargmax(&
%(max)
s->ga, &
%(argmax)
s->ga, &
%(X)
s->ga,
%(name)
s_redux_len,
%(name)
s_axes_to_reduce)
) {
PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to compute gpuarray maxandargmax.");
%(fail)
s
}
"""
if
theano
.
config
.
gpuarray
.
sync
:
ret
+=
"""
GpuArray_sync(&
%(max)
s->ga);
GpuArray_sync(&
%(argmax)
s->ga);
"""
return
ret
%
{
'X'
:
input_names
[
0
],
'axes'
:
sub
[
'params'
],
'max'
:
output_names
[
0
],
'argmax'
:
output_names
[
1
],
'max_typecode'
:
max_typecode
,
'argmax_typecode'
:
argmax_typecode
,
'name'
:
name
,
'fail'
:
sub
[
'fail'
]}
def
c_code_cleanup
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
return
"""
free(
%(name)
s_output_dims);
free(
%(name)
s_axes_to_reduce);
"""
%
{
'name'
:
name
,
'X'
:
inputs
[
0
]}
theano/gpuarray/tests/test_reduction.py
0 → 100644
浏览文件 @
3347480a
from
__future__
import
print_function
,
absolute_import
,
division
from
unittest
import
TestCase
import
numpy
as
np
import
theano
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests.unittest_tools
import
SkipTest
from
.config
import
mode_with_gpu
,
mode_without_gpu
from
.test_basic_ops
import
rand_gpuarray
from
..
import
GpuArrayType
import
math
# Number of values to be used in test tensors (except with 0-D tensors!).
test_size
=
10000000
# NB: This order of "unsorted axes" is arbitrary and is here
# just to have the same informations on profile output
# from one test to another.
unsorted_axes
=
(
2
,
4
,
0
,
3
,
1
)
np
.
random
.
seed
()
def
numpy_random_array
(
shapes
):
size
=
1
for
dimsize
in
shapes
:
size
*=
dimsize
return
np
.
random
.
normal
(
size
=
size
)
.
astype
(
theano
.
config
.
floatX
)
.
reshape
(
shapes
)
def
numpy_maxandargmax
(
X
,
axis
=
None
):
if
axis
is
None
:
axis
=
list
(
range
(
X
.
ndim
))
elif
not
isinstance
(
axis
,
(
tuple
,
list
)):
axis
=
[
int
(
axis
)]
axis
=
list
(
set
(
axis
))
# remove duplicated values.
axis
.
sort
()
axis
=
tuple
(
axis
)
ref_max
=
np
.
max
(
X
,
axis
=
axis
)
# Following code is copied from MaxAndArgmax.perform():
# Numpy does not support multiple axes for argmax. Work around.
keep_axes
=
np
.
array
([
i
for
i
in
range
(
X
.
ndim
)
if
i
not
in
axis
],
dtype
=
'int64'
)
# Not-reduced axes in front
transposed_x
=
np
.
transpose
(
X
,
np
.
concatenate
((
keep_axes
,
axis
)))
kept_shape
=
transposed_x
.
shape
[:
len
(
keep_axes
)]
reduced_shape
=
transposed_x
.
shape
[
len
(
keep_axes
):]
new_shape
=
kept_shape
+
(
np
.
prod
(
reduced_shape
),)
new_shape
=
tuple
(
int
(
i
)
for
i
in
new_shape
)
reshaped_x
=
transposed_x
.
reshape
(
new_shape
)
return
(
ref_max
,
np
.
argmax
(
reshaped_x
,
axis
=-
1
))
def
check_if_gpu_maxandargmax_in_graph
(
theano_function
):
assert
len
([
node
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
theano
.
gpuarray
.
reduction
.
GpuMaxAndArgmax
)])
>
0
def
check_if_gpu_maxandargmax_not_in_graph
(
theano_function
):
assert
len
([
node
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
theano
.
gpuarray
.
reduction
.
GpuMaxAndArgmax
)])
==
0
class
BaseTest
:
# This attribute must be set in subclasses.
tensor_size
=
None
shape
=
None
dtype
=
theano
.
config
.
floatX
def
get_shape
(
self
):
if
self
.
tensor_size
==
0
:
return
[]
return
[
int
(
math
.
ceil
(
math
.
pow
(
test_size
,
1
/
self
.
tensor_size
)))]
*
self
.
tensor_size
def
setUp
(
self
):
if
not
isinstance
(
self
.
tensor_size
,
int
):
raise
SkipTest
(
"No tensor ndim defined."
)
if
self
.
tensor_size
<
0
or
self
.
tensor_size
>
5
:
raise
SkipTest
(
"We allow from 0 (included) to 5 (inclued) dimensons for these tests."
)
if
self
.
shape
is
None
:
self
.
shape
=
self
.
get_shape
()
def
get_host_tensor
(
self
):
broadcastable
=
(
False
,)
*
self
.
tensor_size
return
T
.
tensor
(
self
.
dtype
,
broadcastable
)
def
get_gpu_tensor
(
self
):
broadcastable
=
(
False
,)
*
self
.
tensor_size
return
GpuArrayType
(
self
.
dtype
,
broadcastable
)()
def
get_host_value
(
self
):
return
numpy_random_array
(
self
.
shape
)
def
get_gpu_value
(
self
):
return
rand_gpuarray
(
*
self
.
shape
)
# NB: In compute_host() and compute_gpu(),
# the first call of the theano function should be ignored in profiling,
# with Theano config flag profiling.ignore_first_call=True.
def
compute_host
(
self
,
test_tensor
,
axis
):
M
=
self
.
get_host_tensor
()
f
=
theano
.
function
([
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
name
=
'shape:'
+
str
(
test_tensor
.
shape
)
+
'/axis:'
+
str
(
axis
)
+
'/HOST'
,
mode
=
mode_without_gpu
)
check_if_gpu_maxandargmax_not_in_graph
(
f
)
f
(
test_tensor
)
theano_max
,
theano_argmax
=
f
(
test_tensor
)
ref_max
,
ref_argmax
=
numpy_maxandargmax
(
test_tensor
,
axis
=
axis
)
utt
.
assert_allclose
(
ref_max
,
theano_max
)
utt
.
assert_allclose
(
ref_argmax
,
theano_argmax
)
def
compute_gpu
(
self
,
test_gpu_tensor
,
test_host_tensor
,
axis
):
M
=
self
.
get_gpu_tensor
()
f
=
theano
.
function
([
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
name
=
'shape:'
+
str
(
test_gpu_tensor
.
shape
)
+
'/axis:'
+
str
(
axis
)
+
'/GPU'
,
mode
=
mode_with_gpu
)
check_if_gpu_maxandargmax_in_graph
(
f
)
f
(
test_gpu_tensor
)
theano_max
,
theano_argmax
=
f
(
test_gpu_tensor
)
ref_max
,
ref_argmax
=
numpy_maxandargmax
(
test_host_tensor
,
axis
=
axis
)
utt
.
assert_allclose
(
ref_max
,
theano_max
)
utt
.
assert_allclose
(
ref_argmax
,
theano_argmax
)
def
compute
(
self
,
axis
=
None
):
# We want to run CPU op and GPU op on the same tensor randomly generated.
test_gpu_tensor
=
self
.
get_gpu_value
()
test_host_tensor
=
np
.
asarray
(
test_gpu_tensor
)
self
.
compute_host
(
test_host_tensor
,
axis
)
self
.
compute_gpu
(
test_gpu_tensor
,
test_host_tensor
,
axis
)
def
compute_axis
(
self
,
pos
):
if
self
.
tensor_size
!=
1
and
0
<=
pos
<
self
.
tensor_size
:
self
.
compute
(
pos
)
def
compute_some_axes
(
self
,
count
):
if
0
<=
count
<
self
.
tensor_size
:
self
.
compute
([
i
for
i
in
unsorted_axes
if
i
<
self
.
tensor_size
][:
count
])
# Equivalent to test reduction on all axes.
def
test_none
(
self
):
self
.
compute
(
None
)
def
test_axis_1
(
self
):
self
.
compute_axis
(
0
)
def
test_axis_2
(
self
):
self
.
compute_axis
(
1
)
def
test_axis_3
(
self
):
self
.
compute_axis
(
2
)
def
test_axis_4
(
self
):
self
.
compute_axis
(
3
)
def
test_axis_5
(
self
):
self
.
compute_axis
(
4
)
# For the tests below, we expect CPU op to run with Python implementation.
def
test_2_axes
(
self
):
self
.
compute_some_axes
(
2
)
def
test_3_axes
(
self
):
self
.
compute_some_axes
(
3
)
def
test_4_axes
(
self
):
self
.
compute_some_axes
(
4
)
class
TestScalar
(
BaseTest
,
TestCase
):
tensor_size
=
0
class
TestVector
(
BaseTest
,
TestCase
):
tensor_size
=
1
# Special case
class
TestRow
(
BaseTest
,
TestCase
):
tensor_size
=
2
shape
=
[
1
,
test_size
]
# Special case
class
TestColumn
(
BaseTest
,
TestCase
):
tensor_size
=
2
shape
=
[
test_size
,
1
]
class
TestMatrix
(
BaseTest
,
TestCase
):
tensor_size
=
2
class
TestTensor5
(
BaseTest
,
TestCase
):
tensor_size
=
5
theano/tensor/basic.py
浏览文件 @
3347480a
...
@@ -15,6 +15,7 @@ from theano.compat import izip
...
@@ -15,6 +15,7 @@ from theano.compat import izip
from
theano.configparser
import
config
from
theano.configparser
import
config
from
theano
import
gof
from
theano
import
gof
from
theano.gof
import
Apply
,
Constant
,
Op
,
Variable
from
theano.gof
import
Apply
,
Constant
,
Op
,
Variable
from
theano.gof.type
import
Generic
from
theano.tensor
import
elemwise
from
theano.tensor
import
elemwise
from
theano.tensor.var
import
(
AsTensorError
,
TensorVariable
,
from
theano.tensor.var
import
(
AsTensorError
,
TensorVariable
,
...
@@ -1181,72 +1182,32 @@ class MaxAndArgmax(Op):
...
@@ -1181,72 +1182,32 @@ class MaxAndArgmax(Op):
nin
=
2
# tensor, axis
nin
=
2
# tensor, axis
nout
=
2
# max val, max idx
nout
=
2
# max val, max idx
E_axis
=
'invalid axis'
E_axis
=
'invalid axis'
__props__
=
()
params_type
=
Generic
()
__props__
=
(
'axis'
,)
def
make_node
(
self
,
x
,
axis
=
None
):
x
=
_as_tensor_variable
(
x
)
if
isinstance
(
axis
,
(
integer_types
,
numpy
.
integer
)):
axis
=
[
int
(
axis
)]
elif
isinstance
(
axis
,
numpy
.
ndarray
)
and
axis
.
ndim
==
0
:
axis
=
[
int
(
axis
)]
elif
isinstance
(
axis
,
(
tuple
,
list
,
numpy
.
ndarray
)):
axis
=
[
int
(
a
)
for
a
in
axis
]
if
axis
==
list
(
range
(
x
.
type
.
ndim
)):
axis
=
None
elif
isinstance
(
axis
,
Variable
):
if
NoneConst
.
equals
(
axis
):
axis
=
None
elif
not
isinstance
(
axis
,
TensorConstant
):
raise
TypeError
(
"MaxAndArgmax needs a constant axis. Got
%
s"
%
axis
)
else
:
assert
(
axis
.
dtype
.
startswith
(
"int"
)
or
axis
.
dtype
.
startswith
(
"uint"
))
if
isinstance
(
axis
.
data
,
(
integer_types
,
numpy
.
integer
))
or
\
(
isinstance
(
axis
.
data
,
numpy
.
ndarray
)
and
axis
.
data
.
ndim
==
0
):
axis
=
[
int
(
axis
.
data
)]
elif
isinstance
(
axis
.
data
,
(
list
,
numpy
.
ndarray
)):
axis
=
[
int
(
i
)
for
i
in
axis
.
data
]
# Make axis entries non-negative, and sort them
def
__init__
(
self
,
axis
):
if
isinstance
(
axis
,
list
):
assert
isinstance
(
axis
,
list
)
for
idx
in
xrange
(
len
(
axis
)):
self
.
axis
=
tuple
(
axis
)
if
axis
[
idx
]
<
0
:
axis
[
idx
]
+=
x
.
type
.
ndim
axis
.
sort
()
# Verify that axes are valid
def
get_params
(
self
,
node
):
all_axes
=
[]
return
self
.
axis
if
isinstance
(
axis
,
list
):
for
ax
in
axis
:
if
ax
<
0
or
ax
>=
x
.
type
.
ndim
:
raise
ValueError
(
'Invalid axis:
%
s (the number of dimensions of the '
'input is:
%
s)'
%
(
ax
,
x
.
type
.
ndim
))
if
ax
not
in
all_axes
:
all_axes
.
append
(
ax
)
else
:
all_axes
=
list
(
range
(
x
.
ndim
))
if
axis
is
None
or
axis
==
list
(
range
(
x
.
type
.
ndim
)):
def
make_node
(
self
,
x
):
axis
=
NoneConst
.
clone
()
x
=
_as_tensor_variable
(
x
)
else
:
axis
=
_as_tensor_variable
(
all_axes
)
assert
axis
.
ndim
==
1
inputs
=
[
x
,
axis
]
# We keep the original broadcastable flags for dimensions on which
# We keep the original broadcastable flags for dimensions on which
# we do not perform the max / argmax.
# we do not perform the max / argmax.
all_axes
=
set
(
self
.
axis
)
broadcastable
=
[
b
for
i
,
b
in
enumerate
(
x
.
type
.
broadcastable
)
broadcastable
=
[
b
for
i
,
b
in
enumerate
(
x
.
type
.
broadcastable
)
if
i
not
in
all_axes
]
if
i
not
in
all_axes
]
inputs
=
[
x
]
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
tensor
(
'int64'
,
broadcastable
,
name
=
'argmax'
)]
tensor
(
'int64'
,
broadcastable
,
name
=
'argmax'
)]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
inp
,
outs
):
def
perform
(
self
,
node
,
inp
,
outs
,
params
):
x
,
axes
=
inp
x
=
inp
[
0
]
axes
=
params
max
,
max_idx
=
outs
max
,
max_idx
=
outs
if
axes
is
None
:
if
axes
is
None
:
axes
=
tuple
(
range
(
x
.
ndim
))
axes
=
tuple
(
range
(
x
.
ndim
))
...
@@ -1269,35 +1230,46 @@ class MaxAndArgmax(Op):
...
@@ -1269,35 +1230,46 @@ class MaxAndArgmax(Op):
dtype
=
'int64'
)
dtype
=
'int64'
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
axis
=
inp
if
len
(
self
.
axis
)
!=
1
and
len
(
self
.
axis
)
!=
node
.
inputs
[
0
]
.
ndim
:
raise
NotImplementedError
(
"NumPy C-API can compute max and argmax only for 1 axis or for all axes."
)
x
=
inp
[
0
]
axis
=
sub
[
'params'
]
max
,
argmax
=
out
max
,
argmax
=
out
fail
=
sub
[
"fail"
]
fail
=
sub
[
"fail"
]
if
NoneConst
.
equals
(
node
.
inputs
[
1
]):
ret
=
"""
axis_code
=
"axis = NPY_MAXDIMS;"
#if PY_MAJOR_VERSION >= 3
else
:
#ifndef PyInt_AS_LONG
assert
node
.
inputs
[
1
]
.
ndim
==
1
#define PyInt_AS_LONG PyLong_AS_LONG
# Fall back to perform() if there are multiple axes
#endif
if
len
(
node
.
inputs
[
1
]
.
data
)
>
1
:
#endif
raise
NotImplementedError
()
axis_code
=
"""
int axis;
axis = ((dtype_
%(axis)
s*)PyArray_DATA(
%(axis)
s))[0];
if(axis > PyArray_NDIM(
%(x)
s)-1 || axis < -PyArray_NDIM(
%(x)
s)){
if (PyTuple_GET_SIZE(
%(axis)
s) == PyArray_NDIM(
%(x)
s)) {
axis = NPY_MAXDIMS;
} else if(PyTuple_GET_SIZE(
%(axis)
s) == 1) {
PyObject* axis_object = PyTuple_GET_ITEM(
%(axis)
s, 0);
axis = (int)PyInt_AS_LONG(axis_object);
Py_XDECREF(axis_object);
if (axis > PyArray_NDIM(
%(x)
s)-1 || axis < -PyArray_NDIM(
%(x)
s)) {
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
"MaxAndArgmax, bad axis argument");
"MaxAndArgmax: bad axis argument");
%(fail)
s
}
} else {
PyErr_SetString(PyExc_NotImplementedError,
"MaxAndArgmax: NumPy C-API can compute max and argmax only for 1 axis or for all axes.");
%(fail)
s
%(fail)
s
}
}
"""
%
locals
()
ret
=
"""
int axis;
Py_CLEAR(
%(max)
s);
Py_CLEAR(
%(max)
s);
Py_CLEAR(
%(argmax)
s);//todo pass them as out parameter.
Py_CLEAR(
%(argmax)
s);//todo pass them as out parameter.
%(axis_code)
s
%(max)
s = (PyArrayObject*)PyArray_Max(
%(x)
s, axis, NULL);
%(max)
s = (PyArrayObject*)PyArray_Max(
%(x)
s, axis, NULL);
if
(
%(max)
s == NULL)
{
if
(
%(max)
s == NULL)
{
%(fail)
s;
%(fail)
s;
}
}
if
(!PyArray_CheckExact(
%(max)
s))
{
if
(!PyArray_CheckExact(
%(max)
s))
{
%(max)
s = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(max)
s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
%(max)
s = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(max)
s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(
%(max)
s == NULL){
if(
%(max)
s == NULL){
%(fail)
s;
%(fail)
s;
...
@@ -1305,17 +1277,17 @@ class MaxAndArgmax(Op):
...
@@ -1305,17 +1277,17 @@ class MaxAndArgmax(Op):
}
}
%(argmax)
s = (PyArrayObject*)PyArray_ArgMax(
%(x)
s, axis, NULL);
%(argmax)
s = (PyArrayObject*)PyArray_ArgMax(
%(x)
s, axis, NULL);
if
(
%(argmax)
s == NULL)
{
if
(
%(argmax)
s == NULL)
{
Py_CLEAR(
%(max)
s);
Py_CLEAR(
%(max)
s);
%(fail)
s;
%(fail)
s;
}
}
if
(!PyArray_CheckExact(
%(argmax)
s))
{
if
(!PyArray_CheckExact(
%(argmax)
s))
{
%(argmax)
s = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(argmax)
s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
%(argmax)
s = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(argmax)
s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
if(
%(argmax)
s == NULL){
if(
%(argmax)
s == NULL){
%(fail)
s;
%(fail)
s;
}
}
}
}
if
(PyArray_TYPE(
%(argmax)
s) != NPY_INT64)
{
if
(PyArray_TYPE(
%(argmax)
s) != NPY_INT64)
{
PyObject * tmp = PyArray_Cast(
%(argmax)
s, NPY_INT64);
PyObject * tmp = PyArray_Cast(
%(argmax)
s, NPY_INT64);
if (NULL == tmp){
if (NULL == tmp){
%(fail)
s;
%(fail)
s;
...
@@ -1330,28 +1302,25 @@ class MaxAndArgmax(Op):
...
@@ -1330,28 +1302,25 @@ class MaxAndArgmax(Op):
return
(
4
,)
return
(
4
,)
def
infer_shape
(
self
,
node
,
shapes
):
def
infer_shape
(
self
,
node
,
shapes
):
ishape
,
axis_shape
=
shapes
ishape
=
shapes
[
0
]
axis
=
node
.
inputs
[
1
]
rval
=
tuple
(
ishape
[
i
]
for
(
i
,
b
)
in
enumerate
(
if
axis
.
data
is
None
:
node
.
inputs
[
0
]
.
type
.
broadcastable
)
if
i
not
in
self
.
axis
)
return
[(),
()]
rval
=
tuple
([
ishape
[
i
]
for
(
i
,
b
)
in
enumerate
(
node
.
inputs
[
0
]
.
type
.
broadcastable
)
if
i
not
in
axis
.
data
])
return
[
rval
,
rval
]
return
[
rval
,
rval
]
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
if
eval_points
[
0
]
is
None
:
if
eval_points
[
0
]
is
None
:
return
[
None
,
None
]
return
[
None
,
None
]
if
not
isinstance
(
inputs
[
1
],
theano
.
Constant
)
:
if
len
(
self
.
axis
)
!=
1
:
raise
ValueError
((
'R_op supported for arg_max only for '
raise
ValueError
((
'R_op supported for arg_max only for '
'
constant
axis!'
))
'
one
axis!'
))
if
inputs
[
1
]
.
data
>
1
:
if
self
.
axis
[
0
]
>
1
:
raise
ValueError
((
'R_op supported for arg_max only when '
raise
ValueError
((
'R_op supported for arg_max only when '
' axis is 0 or 1'
))
' axis is 0 or 1'
))
if
inputs
[
0
]
.
ndim
!=
2
:
if
inputs
[
0
]
.
ndim
!=
2
:
raise
ValueError
((
'R_op supported for arg_max only when '
raise
ValueError
((
'R_op supported for arg_max only when '
' input is a matrix'
))
' input is a matrix'
))
max_vals
,
max_pos
=
self
.
make_node
(
*
inputs
)
.
outputs
max_vals
,
max_pos
=
self
.
make_node
(
*
inputs
)
.
outputs
if
inputs
[
1
]
.
data
==
0
:
if
self
.
axis
[
0
]
==
0
:
return
[
eval_points
[
0
][
max_pos
,
return
[
eval_points
[
0
][
max_pos
,
arange
(
eval_points
[
0
]
.
shape
[
1
])],
None
]
arange
(
eval_points
[
0
]
.
shape
[
1
])],
None
]
else
:
else
:
...
@@ -1372,7 +1341,8 @@ class MaxAndArgmax(Op):
...
@@ -1372,7 +1341,8 @@ class MaxAndArgmax(Op):
# g_max has one less dimension than x, so you need to complete
# g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
# does it automatically
x
,
axis
=
inp
x
=
inp
[
0
]
axis
=
_as_tensor_variable
(
self
.
axis
)
g_max
,
g_max_idx
=
grads
g_max
,
g_max_idx
=
grads
g_max_disconnected
=
isinstance
(
g_max
.
type
,
DisconnectedType
)
g_max_disconnected
=
isinstance
(
g_max
.
type
,
DisconnectedType
)
...
@@ -1382,15 +1352,10 @@ class MaxAndArgmax(Op):
...
@@ -1382,15 +1352,10 @@ class MaxAndArgmax(Op):
if
g_max_disconnected
and
g_max_idx_disconnected
:
if
g_max_disconnected
and
g_max_idx_disconnected
:
return
[
DisconnectedType
()(),
DisconnectedType
()()]
return
[
DisconnectedType
()(),
DisconnectedType
()()]
axis_grad
=
grad_undefined
(
self
,
1
,
axis
,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined"
)
# if the max is disconnected but the argmax is not,
# if the max is disconnected but the argmax is not,
# the gradient on its inputs is zero
# the gradient on its inputs is zero
if
g_max_disconnected
:
if
g_max_disconnected
:
return
[
x
.
zeros_like
()
,
axis_grad
]
return
[
x
.
zeros_like
()]
if
NoneConst
.
equals
(
axis
):
if
NoneConst
.
equals
(
axis
):
axis_
=
list
(
range
(
x
.
ndim
))
axis_
=
list
(
range
(
x
.
ndim
))
else
:
else
:
...
@@ -1414,9 +1379,7 @@ class MaxAndArgmax(Op):
...
@@ -1414,9 +1379,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position.
# Set the grad to the correct position.
g_x
=
eq
(
xmax_pad
,
x
)
*
g_max_pad
g_x
=
eq
(
xmax_pad
,
x
)
*
g_max_pad
return
g_x
,
axis_grad
return
g_x
,
_max_and_argmax
=
MaxAndArgmax
()
class
Argmax
(
Op
):
class
Argmax
(
Op
):
...
@@ -1637,8 +1600,39 @@ def max_and_argmax(a, axis=None, keepdims=False):
...
@@ -1637,8 +1600,39 @@ def max_and_argmax(a, axis=None, keepdims=False):
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
"""
"""
# Check axis and convert it to a Python list of integers.
out
,
argout
=
_max_and_argmax
(
a
,
axis
)
# Axis will be used as an op param of MaxAndArgmax.
if
axis
is
None
:
axis
=
list
(
range
(
a
.
type
.
ndim
))
elif
(
isinstance
(
axis
,
(
integer_types
,
numpy
.
integer
))
or
(
isinstance
(
axis
,
numpy
.
ndarray
)
and
axis
.
ndim
==
0
)):
axis
=
[
int
(
axis
)]
elif
isinstance
(
axis
,
(
tuple
,
list
,
numpy
.
ndarray
)):
axis
=
[
int
(
i
)
for
i
in
axis
]
elif
isinstance
(
axis
,
Variable
):
if
NoneConst
.
equals
(
axis
):
axis
=
list
(
range
(
a
.
type
.
ndim
))
elif
not
isinstance
(
axis
,
TensorConstant
):
raise
TypeError
(
"max and argmax computation needs a constant axis. Got
%
s"
%
axis
)
else
:
assert
(
axis
.
dtype
.
startswith
(
"int"
)
or
axis
.
dtype
.
startswith
(
"uint"
))
if
(
isinstance
(
axis
.
data
,
(
integer_types
,
numpy
.
integer
))
or
(
isinstance
(
axis
.
data
,
numpy
.
ndarray
)
and
axis
.
data
.
ndim
==
0
)):
axis
=
[
int
(
axis
.
data
)]
elif
isinstance
(
axis
.
data
,
(
list
,
numpy
.
ndarray
)):
axis
=
[
int
(
i
)
for
i
in
axis
.
data
]
if
len
(
axis
)
==
0
:
axis
=
list
(
range
(
a
.
type
.
ndim
))
else
:
for
i
in
range
(
len
(
axis
)):
if
axis
[
i
]
<
0
:
axis
[
i
]
+=
a
.
type
.
ndim
if
axis
[
i
]
<
0
or
axis
[
i
]
>=
a
.
type
.
ndim
:
raise
ValueError
(
"max and argmax computation needs a valid axis number for
%
d-D tensor. Got
%
d"
%
(
a
.
type
.
ndim
,
axis
[
i
]))
axis
=
list
(
set
(
axis
))
axis
.
sort
()
out
,
argout
=
MaxAndArgmax
(
axis
)(
a
)
if
keepdims
:
if
keepdims
:
out
=
makeKeepDims
(
a
,
out
,
axis
)
out
=
makeKeepDims
(
a
,
out
,
axis
)
...
...
theano/tensor/nnet/nnet.py
浏览文件 @
3347480a
...
@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
...
@@ -1568,9 +1568,9 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
@opt.register_specialize
(
'fast_compile_gpu'
)
@opt.register_specialize
(
'fast_compile_gpu'
)
@gof.local_optimizer
([
tensor
.
_max_and_a
rgmax
])
@gof.local_optimizer
([
tensor
.
MaxAndA
rgmax
])
def
local_argmax_pushdown
(
node
):
def
local_argmax_pushdown
(
node
):
if
node
.
op
==
tensor
.
_max_and_argmax
and
node
.
inputs
[
0
]
.
owner
and
\
if
isinstance
(
node
.
op
,
tensor
.
MaxAndArgmax
)
and
node
.
inputs
[
0
]
.
owner
and
\
len
(
node
.
outputs
[
0
]
.
clients
)
>
0
and
node
.
inputs
[
0
]
.
owner
.
op
in
\
len
(
node
.
outputs
[
0
]
.
clients
)
>
0
and
node
.
inputs
[
0
]
.
owner
.
op
in
\
(
softmax_op
,
softplus
,
tensor
.
exp
,
tensor
.
log
,
tensor
.
tanh
,
sigmoid
,
(
softmax_op
,
softplus
,
tensor
.
exp
,
tensor
.
log
,
tensor
.
tanh
,
sigmoid
,
softmax_with_bias
):
softmax_with_bias
):
...
@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node):
...
@@ -1584,20 +1584,21 @@ def local_argmax_pushdown(node):
"warning set the Theano flags 'warn.argmax_pushdown_bug' "
"warning set the Theano flags 'warn.argmax_pushdown_bug' "
"to False"
)
"to False"
)
if
(
node
.
op
==
tensor
.
_max_and_argmax
and
if
(
isinstance
(
node
.
op
,
tensor
.
MaxAndArgmax
)
and
node
.
inputs
[
0
]
.
owner
and
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
):
node
.
inputs
[
0
]
.
owner
and
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
):
x_max
,
x_argmax
=
node
.
outputs
x_max
,
x_argmax
=
node
.
outputs
x
,
axis
=
node
.
inputs
x
=
node
.
inputs
[
0
]
axis
=
node
.
op
.
get_params
(
node
)
# TODO: Make a list/set of monotonic ops...
# TODO: Make a list/set of monotonic ops...
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax_op
,
softplus
,
tensor
.
exp
,
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax_op
,
softplus
,
tensor
.
exp
,
tensor
.
log
,
tensor
.
tanh
,
sigmoid
):
tensor
.
log
,
tensor
.
tanh
,
sigmoid
):
pre_x
,
=
x
.
owner
.
inputs
pre_x
,
=
x
.
owner
.
inputs
ret
=
tensor
.
_
max_and_argmax
(
pre_x
,
axis
)
ret
=
tensor
.
max_and_argmax
(
pre_x
,
axis
)
copy_stack_trace
(
x_max
,
ret
)
copy_stack_trace
(
x_max
,
ret
)
return
ret
return
ret
if
x
.
owner
and
x
.
owner
.
op
==
softmax_with_bias
:
if
x
.
owner
and
x
.
owner
.
op
==
softmax_with_bias
:
pre_x
,
pre_bias
=
x
.
owner
.
inputs
pre_x
,
pre_bias
=
x
.
owner
.
inputs
ret
=
tensor
.
_
max_and_argmax
(
pre_x
+
ret
=
tensor
.
max_and_argmax
(
pre_x
+
tensor
.
DimShuffle
(
tensor
.
DimShuffle
(
pre_bias
.
broadcastable
,
pre_bias
.
broadcastable
,
(
'x'
,
0
))(
pre_bias
),
axis
)
(
'x'
,
0
))(
pre_bias
),
axis
)
...
...
theano/tensor/opt_uncanonicalize.py
浏览文件 @
3347480a
...
@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
...
@@ -41,8 +41,6 @@ from theano.tensor.elemwise import CAReduce
from
theano.tensor
import
basic
as
T
from
theano.tensor
import
basic
as
T
from
theano.tensor
import
DimShuffle
from
theano.tensor
import
DimShuffle
from
theano.tensor.basic
import
(
get_scalar_constant_value
,
NotScalarConstantError
)
from
theano.tensor.opt
import
register_uncanonicalize
from
theano.tensor.opt
import
register_uncanonicalize
from
theano
import
scalar
as
scal
from
theano
import
scalar
as
scal
...
@@ -50,31 +48,19 @@ _logger = logging.getLogger('theano.tensor.opt')
...
@@ -50,31 +48,19 @@ _logger = logging.getLogger('theano.tensor.opt')
@register_uncanonicalize
@register_uncanonicalize
@gof.local_optimizer
([
T
.
_max_and_a
rgmax
])
@gof.local_optimizer
([
T
.
MaxAndA
rgmax
])
def
local_max_and_argmax
(
node
):
def
local_max_and_argmax
(
node
):
"""
"""
If we don't use the argmax, change it to a max only.
If we don't use the argmax, change it to a max only.
"""
"""
if
node
.
op
==
T
.
_max_and_argmax
:
if
isinstance
(
node
.
op
,
T
.
MaxAndArgmax
):
axis
=
node
.
op
.
get_params
(
node
)
if
len
(
node
.
outputs
[
1
]
.
clients
)
==
0
:
if
len
(
node
.
outputs
[
1
]
.
clients
)
==
0
:
# MaxAndArgmax support variable axis,
# but CAReduce support only constant axis.
if
node
.
inputs
[
1
]
.
data
is
None
:
axis
=
None
else
:
try
:
axis
=
get_scalar_constant_value
(
node
.
inputs
[
1
])
except
NotScalarConstantError
:
axis
=
node
.
inputs
[
1
]
if
not
isinstance
(
axis
,
T
.
TensorConstant
):
return
False
axis
=
axis
.
data
new
=
CAReduce
(
scal
.
maximum
,
axis
)(
node
.
inputs
[
0
])
new
=
CAReduce
(
scal
.
maximum
,
axis
)(
node
.
inputs
[
0
])
return
[
new
,
None
]
return
[
new
,
None
]
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
return
[
None
,
T
.
_argmax
(
node
.
inputs
[
0
],
node
.
inputs
[
1
]
)]
return
[
None
,
T
.
_argmax
(
node
.
inputs
[
0
],
axis
)]
@register_uncanonicalize
@register_uncanonicalize
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
3347480a
...
@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
...
@@ -7619,23 +7619,23 @@ class TestInferShape(utt.InferShapeTester):
# MaxAndArgmax,
# MaxAndArgmax,
adtens3_val
=
rand
(
4
,
5
,
3
)
adtens3_val
=
rand
(
4
,
5
,
3
)
self
.
_compile_and_check
([
adtens3
],
self
.
_compile_and_check
([
adtens3
],
MaxAndArgmax
()
(
adtens3
,
None
),
max_and_argmax
(
adtens3
,
None
),
[
adtens3_val
],
MaxAndArgmax
)
[
adtens3_val
],
MaxAndArgmax
)
self
.
_compile_and_check
([
adtens3
],
self
.
_compile_and_check
([
adtens3
],
MaxAndArgmax
()
(
adtens3
,
0
),
max_and_argmax
(
adtens3
,
0
),
[
adtens3_val
],
MaxAndArgmax
)
[
adtens3_val
],
MaxAndArgmax
)
self
.
_compile_and_check
([
adtens3
],
self
.
_compile_and_check
([
adtens3
],
MaxAndArgmax
()
(
adtens3
,
1
),
max_and_argmax
(
adtens3
,
1
),
[
adtens3_val
],
MaxAndArgmax
)
[
adtens3_val
],
MaxAndArgmax
)
self
.
_compile_and_check
([
adtens3
],
self
.
_compile_and_check
([
adtens3
],
MaxAndArgmax
()
(
adtens3
,
2
),
max_and_argmax
(
adtens3
,
2
),
[
adtens3_val
],
MaxAndArgmax
)
[
adtens3_val
],
MaxAndArgmax
)
self
.
_compile_and_check
([
adtens3
],
self
.
_compile_and_check
([
adtens3
],
MaxAndArgmax
()
(
adtens3
,
[
0
,
1
,
2
]),
max_and_argmax
(
adtens3
,
[
0
,
1
,
2
]),
[
adtens3_val
],
MaxAndArgmax
)
[
adtens3_val
],
MaxAndArgmax
)
# ARange
# ARange
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论