Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ebf8f12a
提交
ebf8f12a
authored
7月 14, 2017
作者:
Pascal Lamblin
提交者:
GitHub
7月 14, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6116 from abergeron/dnn_redux2
Use GpuDnnReduction to replace GpuMaxAndArgmax when possible.
上级
7ed9fb90
f1acf82a
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
176 行增加
和
58 行删除
+176
-58
dnn.py
theano/gpuarray/dnn.py
+66
-4
dnn_redux.c
theano/gpuarray/dnn_redux.c
+1
-6
reduction.py
theano/gpuarray/reduction.py
+2
-2
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+55
-1
test_reduction.py
theano/gpuarray/tests/test_reduction.py
+10
-8
basic.py
theano/tensor/basic.py
+39
-34
test_nnet.py
theano/tensor/nnet/tests/test_nnet.py
+2
-2
opt_uncanonicalize.py
theano/tensor/opt_uncanonicalize.py
+1
-1
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
ebf8f12a
...
...
@@ -12,7 +12,7 @@ import theano
from
theano
import
Op
,
Apply
,
tensor
,
config
,
Variable
from
theano.scalar
import
(
as_scalar
,
constant
,
Log
,
get_scalar_type
,
int32
as
int_t
,
bool
as
bool_t
,
uint32
as
uint32_t
)
from
theano.tensor
import
as_tensor_variable
from
theano.tensor
import
as_tensor_variable
,
Argmax
from
theano.gradient
import
DisconnectedType
,
grad_not_implemented
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof.cmodule
import
GCC_compiler
...
...
@@ -37,6 +37,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous
,
GpuAllocEmpty
,
empty_like
,
GpuArrayType
,
HostFromGpu
)
from
.elemwise
import
GpuElemwise
,
GpuCAReduceCuda
from
.reduction
import
GpuMaxAndArgmax
# These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
...
...
@@ -1592,8 +1593,9 @@ class GpuDnnReduction(DnnBase):
self
.
c_axis
=
self
.
_convert_axis
(
axis
)
# axis is a list of axes to reduce on
self
.
axis
=
axis
if
return_indices
and
(
red_op
!=
'max'
and
red_op
!=
'min'
):
raise
ValueError
(
"Can't request indices for something other than min or max"
)
if
return_indices
and
(
red_op
!=
'maximum'
and
red_op
!=
'minimum'
):
raise
ValueError
(
"Can't request indices for something other than"
" minimum or maximum"
)
self
.
return_indices
=
return_indices
def
_convert_axis
(
self
,
axis
):
...
...
@@ -1897,7 +1899,7 @@ class GpuDnnDropoutOp(DnnBase):
return
Apply
(
self
,
[
inp
,
descriptor
,
state
],
[
inp
.
type
(),
state
.
type
(),
gpudata_type
()])
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
assert
self
.
inplace
,
"GpuDnnDropoutOp not inplace"
...
...
@@ -3123,6 +3125,66 @@ def local_dnn_reduction(node):
False
)(
node
.
inputs
[
0
]),)
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuMaxAndArgmax
])
def
local_cudnn_maxandargmax
(
node
):
if
not
isinstance
(
node
.
op
,
GpuMaxAndArgmax
):
return
if
not
dnn_available
(
node
.
inputs
[
0
]
.
type
.
context_name
):
return
if
version
(
raises
=
False
)
<
6000
:
return
if
node
.
inputs
[
0
]
.
ndim
>
8
:
return
if
node
.
inputs
[
0
]
.
dtype
!=
node
.
outputs
[
0
]
.
dtype
:
return
if
node
.
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
,
'float64'
]:
return
# order of the axes influences the output indices
if
(
node
.
op
.
axis
is
not
None
and
tuple
(
sorted
(
node
.
op
.
axis
))
!=
node
.
op
.
axis
):
return
max
,
arg
=
GpuDnnReduction
(
'maximum'
,
node
.
op
.
axis
,
node
.
outputs
[
0
]
.
dtype
,
node
.
outputs
[
0
]
.
dtype
,
True
)(
node
.
inputs
[
0
])
# cudnn can only return int32 indices
return
(
max
,
as_gpuarray_variable
(
arg
.
astype
(
'int64'
),
node
.
outputs
[
1
]
.
type
.
context_name
))
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
Argmax
])
@register_opt2
([
Argmax
],
'fast_compile'
,
'cudnn'
)
def
local_dnn_argmax
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
return
if
version
(
raises
=
False
)
<
6000
:
return
if
inputs
[
0
]
.
ndim
>
8
:
return
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
,
'float64'
]:
return
# order of the axes influences the output indices
if
op
.
axis
is
not
None
and
tuple
(
sorted
(
op
.
axis
))
!=
op
.
axis
:
return
max
,
arg
=
GpuDnnReduction
(
'maximum'
,
op
.
axis
,
inputs
[
0
]
.
dtype
,
inputs
[
0
]
.
dtype
,
True
)(
*
inputs
)
return
[
as_gpuarray_variable
(
arg
.
astype
(
'int64'
),
ctx_name
)]
class
NoCuDNNRaise
(
Optimizer
):
def
apply
(
self
,
fgraph
):
...
...
theano/gpuarray/dnn_redux.c
浏览文件 @
ebf8f12a
...
...
@@ -61,11 +61,6 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
static
float
fbeta
=
0
.
0
f
;
static
double
dbeta
=
0
.
0
;
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
input
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous inputs are supported."
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
!=
0
)
return
1
;
...
...
@@ -83,7 +78,7 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
if
(
indices
!=
NULL
)
{
if
(
theano_prep_output
(
indices
,
p
,
dims
,
GA_UINT
,
GA_C_ORDER
,
c
)
!=
0
)
return
1
;
indsize
=
PyGpuArray_SIZE
(
*
indices
);
indsize
=
PyGpuArray_SIZE
(
*
indices
)
*
4
;
}
if
(
p
==
input
->
ga
.
nd
||
rsz
==
1
)
{
...
...
theano/gpuarray/reduction.py
浏览文件 @
ebf8f12a
...
...
@@ -37,8 +37,8 @@ class GpuMaxAndArgmax(Op):
broadcastable
=
[
b
for
i
,
b
in
enumerate
(
X
.
type
.
broadcastable
)
if
i
not
in
all_axes
]
inputs
=
[
as_gpuarray_variable
(
X
,
context_name
)]
outputs
=
[
GpuArrayType
(
X
.
type
.
dtype
,
broadcastable
,
context_name
=
context_name
,
name
=
'max'
)(),
GpuArrayType
(
self
.
argmax_dtype
,
broadcastable
,
context_name
=
context_name
,
name
=
'argmax'
)()]
outputs
=
[
GpuArrayType
(
X
.
type
.
dtype
,
broadcastable
,
context_name
=
context_name
)(),
GpuArrayType
(
self
.
argmax_dtype
,
broadcastable
,
context_name
=
context_name
)()]
return
Apply
(
self
,
inputs
,
outputs
)
def
c_headers
(
self
):
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
ebf8f12a
...
...
@@ -18,7 +18,7 @@ from theano.tensor.nnet import bn
from
..
import
dnn
from
..basic_ops
import
GpuAllocEmpty
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
,
GpuArrayType
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
,
ref_cast
from
.
import
test_nnet
...
...
@@ -26,6 +26,11 @@ from .rnn_support import Model, GRU, LSTM, WrapperLayer
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
try
:
import
pygpu
except
ImportError
:
pass
mode_with_gpu
=
mode_with_gpu
.
including
()
# Globally disabled for mode_without_gpu
mode_with_gpu
.
check_py_code
=
False
...
...
@@ -1506,6 +1511,55 @@ def test_dnn_reduction_opt():
yield
dnn_reduction
,
2
,
idtype
,
adtype
,
odtype
def
dnn_reduction_strides
(
shp
,
shuffle
,
slice
):
utt
.
fetch_seed
()
inp
=
GpuArrayType
(
'float32'
,
(
False
,)
*
len
(
shp
),
context_name
=
test_ctx_name
)()
tmp
=
inp
.
dimshuffle
(
shuffle
)[
slice
]
res
=
tmp
.
sum
(
acc_dtype
=
'float32'
,
dtype
=
'float32'
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
assert
any
(
isinstance
(
n
.
op
,
dnn
.
GpuDnnReduction
)
for
n
in
f
.
maker
.
fgraph
.
apply_nodes
)
data
=
np
.
random
.
random
(
shp
)
.
astype
(
'float32'
)
res
=
np
.
sum
(
data
)
gdata
=
pygpu
.
array
(
data
,
context
=
inp
.
type
.
context
)
gres
=
f
(
gdata
)
utt
.
assert_allclose
(
res
,
np
.
array
(
gres
))
def
test_dnn_reduction_strides
():
yield
dnn_reduction_strides
,
(
2
,
3
,
2
),
(
1
,
0
,
2
),
slice
(
None
,
None
,
None
)
yield
dnn_reduction_strides
,
(
2
,
3
,
2
),
(
0
,
1
,
2
),
slice
(
None
,
None
,
-
1
)
def
dnn_maxargmax
(
nd
,
idtype
,
axis
):
inp
=
T
.
TensorType
(
idtype
,
(
False
,)
*
nd
)()
res
=
T
.
max_and_argmax
(
inp
,
axis
=
axis
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
assert
any
(
isinstance
(
n
.
op
,
dnn
.
GpuDnnReduction
)
for
n
in
f
.
maker
.
fgraph
.
apply_nodes
)
def
test_dnn_maxandargmax_opt
():
if
not
dnn
.
dnn_available
(
test_ctx_name
)
or
dnn
.
version
(
raises
=
False
)
<
6000
:
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
for
nd
in
range
(
1
,
9
):
yield
dnn_maxargmax
,
nd
,
'float32'
,
None
for
idtype
in
(
'float64'
,
'float16'
):
yield
dnn_maxargmax
,
2
,
idtype
,
None
yield
dnn_maxargmax
,
3
,
'float32'
,
(
0
,
1
)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
0
,
2
)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
1
,
2
)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
0
,
1
,
2
)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
0
,)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
1
,)
yield
dnn_maxargmax
,
3
,
'float32'
,
(
2
,)
yield
dnn_maxargmax
,
3
,
'float32'
,
()
def
test_dnn_batchnorm_train
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
theano/gpuarray/tests/test_reduction.py
浏览文件 @
ebf8f12a
...
...
@@ -10,6 +10,8 @@ from theano.tests.unittest_tools import SkipTest
from
.config
import
mode_with_gpu
,
mode_without_gpu
from
.test_basic_ops
import
rand_gpuarray
from
..
import
GpuArrayType
from
..reduction
import
GpuMaxAndArgmax
from
..dnn
import
GpuDnnReduction
import
math
...
...
@@ -53,14 +55,14 @@ def numpy_maxandargmax(X, axis=None):
return
(
ref_max
,
np
.
argmax
(
reshaped_x
,
axis
=-
1
))
def
check_if_gpu_
maxandargmax
_in_graph
(
theano_function
):
assert
len
([
node
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
theano
.
gpuarray
.
reduction
.
GpuMaxAndArgmax
)])
>
0
def
check_if_gpu_
reduce
_in_graph
(
theano_function
):
assert
any
(
isinstance
(
node
.
op
,
(
GpuMaxAndArgmax
,
GpuDnnReduction
))
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
)
def
check_if_gpu_
maxandargmax
_not_in_graph
(
theano_function
):
assert
len
([
node
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
theano
.
gpuarray
.
reduction
.
GpuMaxAndArgmax
)])
==
0
def
check_if_gpu_
reduce
_not_in_graph
(
theano_function
):
assert
all
(
not
isinstance
(
node
.
op
,
(
GpuMaxAndArgmax
,
GpuDnnReduction
))
for
node
in
theano_function
.
maker
.
fgraph
.
apply_nodes
)
class
BaseTest
:
...
...
@@ -105,7 +107,7 @@ class BaseTest:
M
=
self
.
get_host_tensor
()
f
=
theano
.
function
([
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
name
=
'shape:'
+
str
(
test_tensor
.
shape
)
+
'/axis:'
+
str
(
axis
)
+
'/HOST'
,
mode
=
mode_without_gpu
)
check_if_gpu_
maxandargmax
_not_in_graph
(
f
)
check_if_gpu_
reduce
_not_in_graph
(
f
)
f
(
test_tensor
)
theano_max
,
theano_argmax
=
f
(
test_tensor
)
ref_max
,
ref_argmax
=
numpy_maxandargmax
(
test_tensor
,
axis
=
axis
)
...
...
@@ -116,7 +118,7 @@ class BaseTest:
M
=
self
.
get_gpu_tensor
()
f
=
theano
.
function
([
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
name
=
'shape:'
+
str
(
test_gpu_tensor
.
shape
)
+
'/axis:'
+
str
(
axis
)
+
'/GPU'
,
mode
=
mode_with_gpu
)
check_if_gpu_
maxandargmax
_in_graph
(
f
)
check_if_gpu_
reduce
_in_graph
(
f
)
f
(
test_gpu_tensor
)
theano_max
,
theano_argmax
=
f
(
test_gpu_tensor
)
ref_max
,
ref_argmax
=
numpy_maxandargmax
(
test_host_tensor
,
axis
=
axis
)
...
...
theano/tensor/basic.py
浏览文件 @
ebf8f12a
...
...
@@ -14,7 +14,7 @@ import theano
from
theano.compat
import
izip
from
theano.configparser
import
config
from
theano
import
gof
from
theano.gof
import
Apply
,
Constant
,
Op
,
Variable
from
theano.gof
import
Apply
,
Constant
,
Op
,
Variable
,
ParamsType
from
theano.gof.type
import
Generic
from
theano.tensor
import
elemwise
...
...
@@ -1429,21 +1429,31 @@ class Argmax(Op):
nin
=
2
# tensor, axis
nout
=
1
E_axis
=
'invalid axis'
__props__
=
()
__props__
=
(
'axis'
,
)
_f16_ok
=
True
params_type
=
ParamsType
(
c_axis
=
scal
.
int64
)
def
__init__
(
self
,
axis
):
if
axis
is
not
None
:
axis
=
tuple
(
axis
)
self
.
axis
=
tuple
(
axis
)
def
get_params
(
self
,
node
):
if
self
.
axis
is
not
None
and
len
(
self
.
axis
)
==
1
:
c_axis
=
np
.
int64
(
self
.
axis
[
0
])
else
:
# The value here doesn't matter, it won't be used
c_axis
=
np
.
int64
(
-
1
)
return
self
.
params_type
.
get_params
(
c_axis
=
c_axis
)
def
make_node
(
self
,
x
,
axis
=
None
):
x
=
_as_tensor_variable
(
x
)
# Check axis and convert it to a Python list of integers.
axis
=
check_and_normalize_axes
(
x
,
axis
)
if
len
(
axis
)
==
0
:
axis
=
NoneConst
.
clone
()
if
self
.
axis
is
None
:
all_axes
=
list
(
range
(
x
.
ndim
))
else
:
all_axes
=
axis
axis
=
_as_tensor_variable
(
axis
)
assert
axis
.
ndim
==
1
inputs
=
[
x
,
axis
]
all_axes
=
self
.
axis
inputs
=
[
x
]
# We keep the original broadcastable flags for dimensions on which
# we do not perform the argmax.
...
...
@@ -1452,13 +1462,16 @@ class Argmax(Op):
outputs
=
[
tensor
(
'int64'
,
broadcastable
,
name
=
'argmax'
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
inp
,
outs
):
x
,
axes
=
inp
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
len
(
node
.
inputs
)
==
2
:
raise
ValueError
(
'You are trying to compile a graph with an old Argmax node. Either reoptimize your graph or rebuild it to get the new node format.'
)
def
perform
(
self
,
node
,
inp
,
outs
,
params
):
x
,
=
inp
axes
=
self
.
axis
max_idx
,
=
outs
if
axes
is
None
:
axes
=
tuple
(
range
(
x
.
ndim
))
else
:
axes
=
tuple
(
int
(
ax
)
for
ax
in
axes
)
# Numpy does not support multiple axes for argmax
# Work around
...
...
@@ -1476,18 +1489,18 @@ class Argmax(Op):
dtype
=
'int64'
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
axis
=
inp
x
,
=
inp
argmax
,
=
out
fail
=
sub
[
"fail"
]
if
NoneConst
.
equals
(
node
.
inputs
[
1
]):
params
=
sub
[
"params"
]
if
self
.
axis
is
None
:
axis_code
=
"axis = NPY_MAXDIMS;"
else
:
assert
node
.
inputs
[
1
]
.
ndim
==
1
# Fall back to perform() if there are multiple axes
if
len
(
node
.
inputs
[
1
]
.
data
)
>
1
:
if
len
(
self
.
axis
)
>
1
:
raise
NotImplementedError
()
# params is only used here for now
axis_code
=
"""
axis =
((dtype_
%(axis)
s*)PyArray_DATA(
%(axis)
s))[0]
;
axis =
%(params)
s->c_axis
;
if(axis > PyArray_NDIM(
%(x)
s)-1 || axis < -PyArray_NDIM(
%(x)
s)){
PyErr_SetString(PyExc_ValueError,
"Argmax, bad axis argument");
...
...
@@ -1522,28 +1535,20 @@ class Argmax(Op):
return
ret
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,)
return
(
1
,)
def
infer_shape
(
self
,
node
,
shapes
):
ishape
,
axis_shape
=
shapes
axis
=
node
.
inputs
[
1
]
if
axis
.
data
is
None
:
ishape
,
=
shapes
if
self
.
axis
is
None
:
return
[()]
rval
=
tuple
([
ishape
[
i
]
for
(
i
,
b
)
in
enumerate
(
node
.
inputs
[
0
]
.
type
.
broadcastable
)
if
i
not
in
axis
.
data
])
node
.
inputs
[
0
]
.
type
.
broadcastable
)
if
i
not
in
self
.
axis
])
return
[
rval
]
def
grad
(
self
,
inp
,
grads
):
x
,
axis
=
inp
axis_grad
=
grad_undefined
(
self
,
1
,
axis
,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined"
)
return
[
x
.
zeros_like
(),
axis_grad
]
x
,
=
inp
_argmax
=
Argmax
()
return
[
x
.
zeros_like
()]
def
makeKeepDims
(
x
,
y
,
axis
):
...
...
theano/tensor/nnet/tests/test_nnet.py
浏览文件 @
ebf8f12a
...
...
@@ -1333,9 +1333,9 @@ def test_argmax_pushdown():
# for node in fgraph.toposort():
# print node.op
assert
len
(
fgraph
.
toposort
())
==
1
assert
fgraph
.
toposort
()[
0
]
.
op
==
tensor
.
basic
.
_argmax
assert
isinstance
(
fgraph
.
toposort
()[
0
]
.
op
,
tensor
.
basic
.
Argmax
)
assert
check_stack_trace
(
fgraph
,
ops_to_check
=
tensor
.
basic
.
_a
rgmax
)
fgraph
,
ops_to_check
=
tensor
.
basic
.
A
rgmax
)
x
=
tensor
.
matrix
()
# test that the max_and_argmax is not pushed down if the max is used
out
=
tensor
.
max_and_argmax
(
...
...
theano/tensor/opt_uncanonicalize.py
浏览文件 @
ebf8f12a
...
...
@@ -60,7 +60,7 @@ def local_max_and_argmax(node):
return
[
new
,
None
]
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
return
[
None
,
T
.
_argmax
(
node
.
inputs
[
0
],
axis
)]
return
[
None
,
T
.
Argmax
(
axis
)(
node
.
inputs
[
0
]
)]
@register_uncanonicalize
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论