Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
b4dc02d6
提交
b4dc02d6
authored
10月 19, 2020
作者:
Brandon T. Willard
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Replace theano.tensor alias T with tt in tests.gpuarray
上级
b4c51eb2
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
345 行增加
和
352 行删除
+345
-352
rnn_support.py
tests/gpuarray/rnn_support.py
+18
-18
test_basic_ops.py
tests/gpuarray/test_basic_ops.py
+42
-42
test_ctc.py
tests/gpuarray/test_ctc.py
+3
-3
test_dnn.py
tests/gpuarray/test_dnn.py
+239
-243
test_extra_ops.py
tests/gpuarray/test_extra_ops.py
+12
-13
test_fft.py
tests/gpuarray/test_fft.py
+2
-2
test_nnet.py
tests/gpuarray/test_nnet.py
+25
-27
test_reduction.py
tests/gpuarray/test_reduction.py
+4
-4
没有找到文件。
tests/gpuarray/rnn_support.py
浏览文件 @
b4dc02d6
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
class
Model
(
object
):
class
Model
(
object
):
...
@@ -125,16 +125,16 @@ class GRU(Layer):
...
@@ -125,16 +125,16 @@ class GRU(Layer):
"""step through processed input to create output"""
"""step through processed input to create output"""
def
step
(
inp
,
s_prev
):
def
step
(
inp
,
s_prev
):
i_t
=
T
.
nnet
.
sigmoid
(
i_t
=
tt
.
nnet
.
sigmoid
(
T
.
dot
(
inp
,
self
.
W_i
)
+
T
.
dot
(
s_prev
,
self
.
R_i
)
+
self
.
b_wi
+
self
.
b_ru
tt
.
dot
(
inp
,
self
.
W_i
)
+
tt
.
dot
(
s_prev
,
self
.
R_i
)
+
self
.
b_wi
+
self
.
b_ru
)
)
r_t
=
T
.
nnet
.
sigmoid
(
r_t
=
tt
.
nnet
.
sigmoid
(
T
.
dot
(
inp
,
self
.
W_r
)
+
T
.
dot
(
s_prev
,
self
.
R_r
)
+
self
.
b_wr
+
self
.
b_rr
tt
.
dot
(
inp
,
self
.
W_r
)
+
tt
.
dot
(
s_prev
,
self
.
R_r
)
+
self
.
b_wr
+
self
.
b_rr
)
)
h_hat_t
=
T
.
tanh
(
h_hat_t
=
tt
.
tanh
(
T
.
dot
(
inp
,
self
.
W_h
)
tt
.
dot
(
inp
,
self
.
W_h
)
+
(
r_t
*
(
T
.
dot
(
s_prev
,
self
.
R_h
)
+
self
.
b_rh
))
+
(
r_t
*
(
tt
.
dot
(
s_prev
,
self
.
R_h
)
+
self
.
b_rh
))
+
self
.
b_wh
+
self
.
b_wh
)
)
...
@@ -229,21 +229,21 @@ class LSTM(Layer):
...
@@ -229,21 +229,21 @@ class LSTM(Layer):
"""step through processed input to create output"""
"""step through processed input to create output"""
def
step
(
x_t
,
h_tm1
,
c_tm1
):
def
step
(
x_t
,
h_tm1
,
c_tm1
):
i_t
=
T
.
nnet
.
sigmoid
(
i_t
=
tt
.
nnet
.
sigmoid
(
T
.
dot
(
x_t
,
self
.
W_i
)
+
T
.
dot
(
h_tm1
,
self
.
R_i
)
+
self
.
b_wi
+
self
.
b_ri
tt
.
dot
(
x_t
,
self
.
W_i
)
+
tt
.
dot
(
h_tm1
,
self
.
R_i
)
+
self
.
b_wi
+
self
.
b_ri
)
)
f_t
=
T
.
nnet
.
sigmoid
(
f_t
=
tt
.
nnet
.
sigmoid
(
T
.
dot
(
x_t
,
self
.
W_f
)
+
T
.
dot
(
h_tm1
,
self
.
R_f
)
+
self
.
b_wf
+
self
.
b_rf
tt
.
dot
(
x_t
,
self
.
W_f
)
+
tt
.
dot
(
h_tm1
,
self
.
R_f
)
+
self
.
b_wf
+
self
.
b_rf
)
)
o_t
=
T
.
nnet
.
sigmoid
(
o_t
=
tt
.
nnet
.
sigmoid
(
T
.
dot
(
x_t
,
self
.
W_o
)
+
T
.
dot
(
h_tm1
,
self
.
R_o
)
+
self
.
b_ro
+
self
.
b_wo
tt
.
dot
(
x_t
,
self
.
W_o
)
+
tt
.
dot
(
h_tm1
,
self
.
R_o
)
+
self
.
b_ro
+
self
.
b_wo
)
)
c_hat_t
=
T
.
tanh
(
c_hat_t
=
tt
.
tanh
(
T
.
dot
(
x_t
,
self
.
W_c
)
+
T
.
dot
(
h_tm1
,
self
.
R_c
)
+
self
.
b_wc
+
self
.
b_rc
tt
.
dot
(
x_t
,
self
.
W_c
)
+
tt
.
dot
(
h_tm1
,
self
.
R_c
)
+
self
.
b_wc
+
self
.
b_rc
)
)
c_t
=
f_t
*
c_tm1
+
i_t
*
c_hat_t
c_t
=
f_t
*
c_tm1
+
i_t
*
c_hat_t
h_t
=
o_t
*
T
.
tanh
(
c_t
)
h_t
=
o_t
*
tt
.
tanh
(
c_t
)
return
h_t
,
c_t
return
h_t
,
c_t
...
@@ -275,7 +275,7 @@ class FC(Layer):
...
@@ -275,7 +275,7 @@ class FC(Layer):
self
.
b
=
bias_weights
((
output_dim
,),
param_list
=
self
.
params
,
name
=
name
+
".b"
)
self
.
b
=
bias_weights
((
output_dim
,),
param_list
=
self
.
params
,
name
=
name
+
".b"
)
def
output
(
self
):
def
output
(
self
):
return
T
.
dot
(
self
.
X
,
self
.
W
)
+
self
.
b
return
tt
.
dot
(
self
.
X
,
self
.
W
)
+
self
.
b
class
WrapperLayer
(
Layer
):
class
WrapperLayer
(
Layer
):
...
...
tests/gpuarray/test_basic_ops.py
浏览文件 @
b4dc02d6
...
@@ -4,7 +4,7 @@ pygpu = pytest.importorskip("pygpu")
...
@@ -4,7 +4,7 @@ pygpu = pytest.importorskip("pygpu")
gpuarray
=
pygpu
.
gpuarray
gpuarray
=
pygpu
.
gpuarray
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
from
theano.tensor
import
TensorType
from
theano.tensor
import
TensorType
from
theano.tensor.basic
import
alloc
from
theano.tensor.basic
import
alloc
...
@@ -217,7 +217,7 @@ def makeTester(
...
@@ -217,7 +217,7 @@ def makeTester(
def
test_transfer_cpu_gpu
():
def
test_transfer_cpu_gpu
():
a
=
T
.
fmatrix
(
"a"
)
a
=
tt
.
fmatrix
(
"a"
)
g
=
GpuArrayType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,
False
))(
"g"
)
g
=
GpuArrayType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,
False
))(
"g"
)
av
=
np
.
asarray
(
rng
.
rand
(
5
,
4
),
dtype
=
"float32"
)
av
=
np
.
asarray
(
rng
.
rand
(
5
,
4
),
dtype
=
"float32"
)
...
@@ -254,7 +254,7 @@ def test_transfer_strided():
...
@@ -254,7 +254,7 @@ def test_transfer_strided():
# This is just to ensure that it works in theano
# This is just to ensure that it works in theano
# libgpuarray has a much more comprehensive suit of tests to
# libgpuarray has a much more comprehensive suit of tests to
# ensure correctness
# ensure correctness
a
=
T
.
fmatrix
(
"a"
)
a
=
tt
.
fmatrix
(
"a"
)
g
=
GpuArrayType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,
False
))(
"g"
)
g
=
GpuArrayType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,
False
))(
"g"
)
av
=
np
.
asarray
(
rng
.
rand
(
5
,
8
),
dtype
=
"float32"
)
av
=
np
.
asarray
(
rng
.
rand
(
5
,
8
),
dtype
=
"float32"
)
...
@@ -300,7 +300,7 @@ class TestGPUAlloc(TestAlloc):
...
@@ -300,7 +300,7 @@ class TestGPUAlloc(TestAlloc):
dtype
=
"float32"
dtype
=
"float32"
mode
=
mode_with_gpu
mode
=
mode_with_gpu
shared
=
staticmethod
(
gpuarray_shared_constructor
)
shared
=
staticmethod
(
gpuarray_shared_constructor
)
allocs
=
[
GpuAlloc
(
test_ctx_name
),
GpuAlloc
(
test_ctx_name
),
T
.
Alloc
()]
allocs
=
[
GpuAlloc
(
test_ctx_name
),
GpuAlloc
(
test_ctx_name
),
tt
.
Alloc
()]
def
test_alloc_empty
():
def
test_alloc_empty
():
...
@@ -343,21 +343,21 @@ def test_shape():
...
@@ -343,21 +343,21 @@ def test_shape():
assert
np
.
all
(
f
(
v
)
==
(
3
,
4
,
5
))
assert
np
.
all
(
f
(
v
)
==
(
3
,
4
,
5
))
if
theano
.
config
.
mode
!=
"FAST_COMPILE"
:
if
theano
.
config
.
mode
!=
"FAST_COMPILE"
:
assert
len
(
topo
)
==
4
assert
len
(
topo
)
==
4
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
0
]
.
op
,
tt
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
tt
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
tt
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
3
]
.
op
,
T
.
opt
.
MakeVector
)
assert
isinstance
(
topo
[
3
]
.
op
,
tt
.
opt
.
MakeVector
)
mode
=
mode_with_gpu
.
excluding
(
"local_shape_to_shape_i"
)
mode
=
mode_with_gpu
.
excluding
(
"local_shape_to_shape_i"
)
f
=
theano
.
function
([
x
],
x
.
shape
,
mode
=
mode
)
f
=
theano
.
function
([
x
],
x
.
shape
,
mode
=
mode
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
np
.
all
(
f
(
v
)
==
(
3
,
4
,
5
))
assert
np
.
all
(
f
(
v
)
==
(
3
,
4
,
5
))
assert
len
(
topo
)
==
1
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
Shape
)
assert
isinstance
(
topo
[
0
]
.
op
,
tt
.
Shape
)
def
test_gpu_contiguous
():
def
test_gpu_contiguous
():
a
=
T
.
fmatrix
(
"a"
)
a
=
tt
.
fmatrix
(
"a"
)
i
=
T
.
iscalar
(
"i"
)
i
=
tt
.
iscalar
(
"i"
)
a_val
=
np
.
asarray
(
np
.
random
.
rand
(
4
,
5
),
dtype
=
"float32"
)
a_val
=
np
.
asarray
(
np
.
random
.
rand
(
4
,
5
),
dtype
=
"float32"
)
# The reshape is needed otherwise we make the subtensor on the CPU
# The reshape is needed otherwise we make the subtensor on the CPU
# to transfer less data.
# to transfer less data.
...
@@ -383,8 +383,8 @@ class TestGPUReshape(TestReshape):
...
@@ -383,8 +383,8 @@ class TestGPUReshape(TestReshape):
theano
.
compile
.
DeepCopyOp
,
theano
.
compile
.
DeepCopyOp
,
GpuDimShuffle
,
GpuDimShuffle
,
GpuElemwise
,
GpuElemwise
,
t
heano
.
tensor
.
opt
.
Shape_i
,
t
t
.
opt
.
Shape_i
,
t
heano
.
tensor
.
opt
.
MakeVector
,
t
t
.
opt
.
MakeVector
,
)
)
assert
self
.
op
==
GpuReshape
assert
self
.
op
==
GpuReshape
...
@@ -418,7 +418,7 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
...
@@ -418,7 +418,7 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
# Also test float16 computation at the same time.
# Also test float16 computation at the same time.
rng
=
np
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
rng
=
np
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
m
=
self
.
shared
(
rng
.
rand
(
4
,
6
)
.
astype
(
"float16"
))
m
=
self
.
shared
(
rng
.
rand
(
4
,
6
)
.
astype
(
"float16"
))
o
=
T
.
Split
(
2
)(
m
,
0
,
[
2
,
2
])
o
=
tt
.
Split
(
2
)(
m
,
0
,
[
2
,
2
])
assert
o
[
0
]
.
dtype
==
"float16"
assert
o
[
0
]
.
dtype
==
"float16"
f
=
theano
.
function
([],
o
,
mode
=
self
.
mode
)
f
=
theano
.
function
([],
o
,
mode
=
self
.
mode
)
assert
any
(
assert
any
(
...
@@ -433,22 +433,22 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
...
@@ -433,22 +433,22 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
def
test_gpujoin_gpualloc
():
def
test_gpujoin_gpualloc
():
a
=
T
.
fmatrix
(
"a"
)
a
=
tt
.
fmatrix
(
"a"
)
a_val
=
np
.
asarray
(
np
.
random
.
rand
(
4
,
5
),
dtype
=
"float32"
)
a_val
=
np
.
asarray
(
np
.
random
.
rand
(
4
,
5
),
dtype
=
"float32"
)
b
=
T
.
fmatrix
(
"b"
)
b
=
tt
.
fmatrix
(
"b"
)
b_val
=
np
.
asarray
(
np
.
random
.
rand
(
3
,
5
),
dtype
=
"float32"
)
b_val
=
np
.
asarray
(
np
.
random
.
rand
(
3
,
5
),
dtype
=
"float32"
)
f
=
theano
.
function
(
f
=
theano
.
function
(
[
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
mode
=
mode_without_gpu
[
a
,
b
],
tt
.
join
(
0
,
tt
.
zeros_like
(
a
),
tt
.
ones_like
(
b
))
+
4
,
mode
=
mode_without_gpu
)
)
f_gpu
=
theano
.
function
(
f_gpu
=
theano
.
function
(
[
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
)),
mode
=
mode_with_gpu
[
a
,
b
],
tt
.
join
(
0
,
tt
.
zeros_like
(
a
),
tt
.
ones_like
(
b
)),
mode
=
mode_with_gpu
)
)
f_gpu2
=
theano
.
function
(
f_gpu2
=
theano
.
function
(
[
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
mode
=
mode_with_gpu
[
a
,
b
],
tt
.
join
(
0
,
tt
.
zeros_like
(
a
),
tt
.
ones_like
(
b
))
+
4
,
mode
=
mode_with_gpu
)
)
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
tt
.
alloc
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
T
.
join_
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
tt
.
join_
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
assert
(
assert
(
sum
([
isinstance
(
node
.
op
,
GpuAlloc
)
for
node
in
f_gpu
.
maker
.
fgraph
.
toposort
()])
sum
([
isinstance
(
node
.
op
,
GpuAlloc
)
for
node
in
f_gpu
.
maker
.
fgraph
.
toposort
()])
==
2
==
2
...
@@ -471,10 +471,10 @@ def test_gpueye():
...
@@ -471,10 +471,10 @@ def test_gpueye():
# allowed.
# allowed.
if
M
is
None
:
if
M
is
None
:
M
=
N
M
=
N
N_symb
=
T
.
iscalar
()
N_symb
=
tt
.
iscalar
()
M_symb
=
T
.
iscalar
()
M_symb
=
tt
.
iscalar
()
k_symb
=
T
.
iscalar
()
k_symb
=
tt
.
iscalar
()
out
=
T
.
eye
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
+
np
.
array
(
1
)
.
astype
(
dtype
)
out
=
tt
.
eye
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
+
np
.
array
(
1
)
.
astype
(
dtype
)
f
=
theano
.
function
([
N_symb
,
M_symb
,
k_symb
],
out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
N_symb
,
M_symb
,
k_symb
],
out
,
mode
=
mode_with_gpu
)
result
=
np
.
asarray
(
f
(
N
,
M
,
k
))
-
np
.
array
(
1
)
.
astype
(
dtype
)
result
=
np
.
asarray
(
f
(
N
,
M
,
k
))
-
np
.
array
(
1
)
.
astype
(
dtype
)
...
@@ -511,7 +511,7 @@ def test_hostfromgpu_shape_i():
...
@@ -511,7 +511,7 @@ def test_hostfromgpu_shape_i():
m
=
mode_with_gpu
.
including
(
m
=
mode_with_gpu
.
including
(
"local_dot_to_dot22"
,
"local_dot22_to_dot22scalar"
,
"specialize"
"local_dot_to_dot22"
,
"local_dot22_to_dot22scalar"
,
"specialize"
)
)
a
=
T
.
fmatrix
(
"a"
)
a
=
tt
.
fmatrix
(
"a"
)
ca
=
theano
.
gpuarray
.
type
.
GpuArrayType
(
"float32"
,
(
False
,
False
))()
ca
=
theano
.
gpuarray
.
type
.
GpuArrayType
(
"float32"
,
(
False
,
False
))()
av
=
np
.
asarray
(
np
.
random
.
rand
(
5
,
4
),
dtype
=
"float32"
)
av
=
np
.
asarray
(
np
.
random
.
rand
(
5
,
4
),
dtype
=
"float32"
)
cv
=
gpuarray
.
asarray
(
cv
=
gpuarray
.
asarray
(
...
@@ -522,9 +522,9 @@ def test_hostfromgpu_shape_i():
...
@@ -522,9 +522,9 @@ def test_hostfromgpu_shape_i():
assert
any
(
isinstance
(
x
.
op
,
GpuFromHost
)
for
x
in
f
.
maker
.
fgraph
.
toposort
())
assert
any
(
isinstance
(
x
.
op
,
GpuFromHost
)
for
x
in
f
.
maker
.
fgraph
.
toposort
())
f
=
theano
.
function
([
a
],
GpuFromHost
(
test_ctx_name
)(
a
)
.
shape
,
mode
=
m
)
f
=
theano
.
function
([
a
],
GpuFromHost
(
test_ctx_name
)(
a
)
.
shape
,
mode
=
m
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
0
]
.
op
,
tt
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
tt
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
MakeVector
)
assert
isinstance
(
topo
[
2
]
.
op
,
tt
.
opt
.
MakeVector
)
assert
tuple
(
f
(
av
))
==
(
5
,
4
)
assert
tuple
(
f
(
av
))
==
(
5
,
4
)
f
=
theano
.
function
([
ca
],
host_from_gpu
(
ca
),
mode
=
m
)
f
=
theano
.
function
([
ca
],
host_from_gpu
(
ca
),
mode
=
m
)
...
@@ -533,7 +533,7 @@ def test_hostfromgpu_shape_i():
...
@@ -533,7 +533,7 @@ def test_hostfromgpu_shape_i():
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
compile
.
Shape_i
)
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
compile
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
theano
.
compile
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
theano
.
compile
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
t
heano
.
tensor
.
opt
.
MakeVector
)
assert
isinstance
(
topo
[
2
]
.
op
,
t
t
.
opt
.
MakeVector
)
assert
tuple
(
f
(
cv
))
==
(
5
,
4
)
assert
tuple
(
f
(
cv
))
==
(
5
,
4
)
...
@@ -544,10 +544,10 @@ def test_Gpujoin_inplace():
...
@@ -544,10 +544,10 @@ def test_Gpujoin_inplace():
# Gpujoin function but all except one of them are empty. In this case
# Gpujoin function but all except one of them are empty. In this case
# Gpujoin should work inplace and the output should be the view of the
# Gpujoin should work inplace and the output should be the view of the
# non-empty element.
# non-empty element.
s
=
T
.
lscalar
()
s
=
tt
.
lscalar
()
data
=
np
.
array
([
3
,
4
,
5
],
dtype
=
theano
.
config
.
floatX
)
data
=
np
.
array
([
3
,
4
,
5
],
dtype
=
theano
.
config
.
floatX
)
x
=
gpuarray_shared_constructor
(
data
,
borrow
=
True
)
x
=
gpuarray_shared_constructor
(
data
,
borrow
=
True
)
z
=
T
.
zeros
((
s
,))
z
=
tt
.
zeros
((
s
,))
join
=
GpuJoin
(
view
=
0
)
join
=
GpuJoin
(
view
=
0
)
c
=
join
(
0
,
x
,
z
)
c
=
join
(
0
,
x
,
z
)
...
@@ -560,11 +560,11 @@ def test_Gpujoin_inplace():
...
@@ -560,11 +560,11 @@ def test_Gpujoin_inplace():
def
test_gpu_tril_triu
():
def
test_gpu_tril_triu
():
def
check_l
(
m
,
k
=
0
):
def
check_l
(
m
,
k
=
0
):
m_symb
=
T
.
matrix
(
dtype
=
m
.
dtype
)
m_symb
=
tt
.
matrix
(
dtype
=
m
.
dtype
)
k_symb
=
T
.
iscalar
()
k_symb
=
tt
.
iscalar
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
m_symb
,
k_symb
],
T
.
tril
(
m_symb
,
k_symb
),
mode
=
mode_with_gpu
[
m_symb
,
k_symb
],
tt
.
tril
(
m_symb
,
k_symb
),
mode
=
mode_with_gpu
)
)
result
=
f
(
m
,
k
)
result
=
f
(
m
,
k
)
assert
np
.
allclose
(
result
,
np
.
tril
(
m
,
k
))
assert
np
.
allclose
(
result
,
np
.
tril
(
m
,
k
))
...
@@ -572,10 +572,10 @@ def test_gpu_tril_triu():
...
@@ -572,10 +572,10 @@ def test_gpu_tril_triu():
assert
any
([
isinstance
(
node
.
op
,
GpuTri
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
GpuTri
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
def
check_u
(
m
,
k
=
0
):
def
check_u
(
m
,
k
=
0
):
m_symb
=
T
.
matrix
(
dtype
=
m
.
dtype
)
m_symb
=
tt
.
matrix
(
dtype
=
m
.
dtype
)
k_symb
=
T
.
iscalar
()
k_symb
=
tt
.
iscalar
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
m_symb
,
k_symb
],
T
.
triu
(
m_symb
,
k_symb
),
mode
=
mode_with_gpu
[
m_symb
,
k_symb
],
tt
.
triu
(
m_symb
,
k_symb
),
mode
=
mode_with_gpu
)
)
result
=
f
(
m
,
k
)
result
=
f
(
m
,
k
)
assert
np
.
allclose
(
result
,
np
.
triu
(
m
,
k
))
assert
np
.
allclose
(
result
,
np
.
triu
(
m
,
k
))
...
@@ -624,10 +624,10 @@ def test_gputri():
...
@@ -624,10 +624,10 @@ def test_gputri():
# allowed.
# allowed.
if
M
is
None
:
if
M
is
None
:
M
=
N
M
=
N
N_symb
=
T
.
iscalar
()
N_symb
=
tt
.
iscalar
()
M_symb
=
T
.
iscalar
()
M_symb
=
tt
.
iscalar
()
k_symb
=
T
.
iscalar
()
k_symb
=
tt
.
iscalar
()
out
=
T
.
tri
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
+
np
.
array
(
1
)
.
astype
(
dtype
)
out
=
tt
.
tri
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
+
np
.
array
(
1
)
.
astype
(
dtype
)
f
=
theano
.
function
([
N_symb
,
M_symb
,
k_symb
],
out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
N_symb
,
M_symb
,
k_symb
],
out
,
mode
=
mode_with_gpu
)
result
=
np
.
asarray
(
f
(
N
,
M
,
k
))
-
np
.
array
(
1
)
.
astype
(
dtype
)
result
=
np
.
asarray
(
f
(
N
,
M
,
k
))
-
np
.
array
(
1
)
.
astype
(
dtype
)
assert
np
.
allclose
(
result
,
np
.
tri
(
N
,
M_
,
k
,
dtype
=
dtype
))
assert
np
.
allclose
(
result
,
np
.
tri
(
N
,
M_
,
k
,
dtype
=
dtype
))
...
...
tests/gpuarray/test_ctc.py
浏览文件 @
b4dc02d6
...
@@ -2,7 +2,7 @@ import pytest
...
@@ -2,7 +2,7 @@ import pytest
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
import
theano.gpuarray
import
theano.gpuarray
from
theano.gpuarray.ctc
import
gpu_ctc
,
GpuConnectionistTemporalClassification
from
theano.gpuarray.ctc
import
gpu_ctc
,
GpuConnectionistTemporalClassification
...
@@ -56,7 +56,7 @@ class TestCTC:
...
@@ -56,7 +56,7 @@ class TestCTC:
outputs
=
[
cpu_ctc_cost
]
outputs
=
[
cpu_ctc_cost
]
if
compute_grad
:
if
compute_grad
:
# Symbolic gradient of CTC cost
# Symbolic gradient of CTC cost
cpu_ctc_grad
=
T
.
grad
(
T
.
mean
(
cpu_ctc_cost
),
activations
)
cpu_ctc_grad
=
tt
.
grad
(
tt
.
mean
(
cpu_ctc_cost
),
activations
)
outputs
+=
[
cpu_ctc_grad
]
outputs
+=
[
cpu_ctc_grad
]
return
theano
.
function
([],
outputs
,
mode
=
mode
)
return
theano
.
function
([],
outputs
,
mode
=
mode
)
...
@@ -65,7 +65,7 @@ class TestCTC:
...
@@ -65,7 +65,7 @@ class TestCTC:
outputs
=
[
gpu_ctc_cost
]
outputs
=
[
gpu_ctc_cost
]
if
compute_grad
:
if
compute_grad
:
# Symbolic gradient of CTC cost
# Symbolic gradient of CTC cost
gpu_ctc_grad
=
T
.
grad
(
T
.
mean
(
gpu_ctc_cost
),
activations
)
gpu_ctc_grad
=
tt
.
grad
(
tt
.
mean
(
gpu_ctc_cost
),
activations
)
outputs
+=
[
gpu_ctc_grad
]
outputs
+=
[
gpu_ctc_grad
]
return
theano
.
function
([],
outputs
,
mode
=
mode_with_gpu
)
return
theano
.
function
([],
outputs
,
mode
=
mode_with_gpu
)
...
...
tests/gpuarray/test_dnn.py
浏览文件 @
b4dc02d6
...
@@ -2,9 +2,10 @@ import logging
...
@@ -2,9 +2,10 @@ import logging
import
pytest
import
pytest
import
numpy
as
np
import
numpy
as
np
pygpu
=
pytest
.
importorskip
(
"pygpu"
)
pygpu
=
pytest
.
importorskip
(
"pygpu"
)
# noqa
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
import
tests.unittest_tools
as
utt
import
tests.unittest_tools
as
utt
from
itertools
import
product
,
chain
from
itertools
import
product
,
chain
...
@@ -12,17 +13,27 @@ from collections import OrderedDict
...
@@ -12,17 +13,27 @@ from collections import OrderedDict
from
six
import
StringIO
from
six
import
StringIO
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.nnet
import
(
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
bn
,
softmax_op
,
SoftmaxGrad
,
softmax
,
LogSoftmax
,
Softmax
,
conv2d
,
)
from
theano.tensor.nnet.corr3d
import
Corr3dMM
from
theano.tensor.nnet.corr
import
CorrMM
from
theano.tensor.nnet.abstract_conv
import
(
from
theano.tensor.nnet.abstract_conv
import
(
get_conv_output_shape
,
get_conv_output_shape
,
get_conv_gradinputs_shape
,
get_conv_gradinputs_shape
,
)
)
from
theano.tensor.nnet
import
bn
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.gpuarray
import
dnn
from
theano.gpuarray
import
dnn
from
theano.gpuarray.basic_ops
import
GpuAllocEmpty
from
theano.gpuarray.basic_ops
import
GpuAllocEmpty
from
theano.gpuarray.type
import
gpuarray_shared_constructor
,
GpuArrayType
from
theano.gpuarray.type
import
gpuarray_shared_constructor
,
GpuArrayType
...
@@ -76,7 +87,7 @@ def set_precision(floatX):
...
@@ -76,7 +87,7 @@ def set_precision(floatX):
def
test_dnn_conv_desc_merge
():
def
test_dnn_conv_desc_merge
():
kern_shp
=
T
.
as_tensor_variable
(
np
.
asarray
([
3
,
1
,
2
,
2
])
.
astype
(
"int64"
))
kern_shp
=
tt
.
as_tensor_variable
(
np
.
asarray
([
3
,
1
,
2
,
2
])
.
astype
(
"int64"
))
desc1
=
dnn
.
GpuDnnConvDesc
(
desc1
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
,
subsample
=
(
2
,
2
),
dilation
=
(
1
,
1
),
conv_mode
=
"conv"
border_mode
=
"valid"
,
subsample
=
(
2
,
2
),
dilation
=
(
1
,
1
),
conv_mode
=
"conv"
)(
kern_shp
)
)(
kern_shp
)
...
@@ -101,9 +112,9 @@ def test_dnn_conv_merge():
...
@@ -101,9 +112,9 @@ def test_dnn_conv_merge():
# This test that we merge correctly multiple dnn_conv.
# This test that we merge correctly multiple dnn_conv.
img_shp
=
[
2
,
5
,
6
,
8
]
img_shp
=
[
2
,
5
,
6
,
8
]
kern_shp
=
[
3
,
5
,
5
,
6
]
kern_shp
=
[
3
,
5
,
5
,
6
]
img
=
T
.
tensor4
(
"img"
)
img
=
tt
.
tensor4
(
"img"
)
kern
=
T
.
tensor4
(
"kern"
)
kern
=
tt
.
tensor4
(
"kern"
)
out
=
T
.
tensor4
(
"out"
)
out
=
tt
.
tensor4
(
"out"
)
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
)(
kern
.
shape
)
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
)(
kern
.
shape
)
# Test forward op
# Test forward op
...
@@ -139,9 +150,9 @@ def test_dnn_conv_inplace():
...
@@ -139,9 +150,9 @@ def test_dnn_conv_inplace():
img_shp
=
[
2
,
5
,
6
,
8
]
img_shp
=
[
2
,
5
,
6
,
8
]
kern_shp
=
[
3
,
5
,
5
,
6
]
kern_shp
=
[
3
,
5
,
5
,
6
]
img
=
T
.
tensor4
(
"img"
)
img
=
tt
.
tensor4
(
"img"
)
kern
=
T
.
tensor4
(
"kern"
)
kern
=
tt
.
tensor4
(
"kern"
)
out
=
T
.
tensor4
(
"out"
)
out
=
tt
.
tensor4
(
"out"
)
desc1
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
,
conv_mode
=
"conv"
)(
kern
.
shape
)
desc1
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
,
conv_mode
=
"conv"
)(
kern
.
shape
)
desc2
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
,
conv_mode
=
"cross"
)(
kern
.
shape
)
desc2
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
"valid"
,
conv_mode
=
"cross"
)(
kern
.
shape
)
...
@@ -184,9 +195,9 @@ def test_dnn_conv_inplace():
...
@@ -184,9 +195,9 @@ def test_dnn_conv_inplace():
def
run_dnn_conv_invalid_precision
(
ndim
):
def
run_dnn_conv_invalid_precision
(
ndim
):
bc
=
(
False
,)
*
(
ndim
+
2
)
bc
=
(
False
,)
*
(
ndim
+
2
)
img
=
T
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
img
=
tt
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
kerns
=
T
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
kerns
=
tt
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
topgrad
=
T
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
topgrad
=
tt
.
tensor
(
theano
.
config
.
floatX
,
broadcastable
=
bc
)
shape
=
np
.
arange
(
ndim
+
2
)
shape
=
np
.
arange
(
ndim
+
2
)
if
ndim
==
2
:
if
ndim
==
2
:
dnn_conv_func
=
dnn
.
dnn_conv
dnn_conv_func
=
dnn
.
dnn_conv
...
@@ -242,8 +253,8 @@ def test_dnn_conv_invalid_precision():
...
@@ -242,8 +253,8 @@ def test_dnn_conv_invalid_precision():
def
test_dnn_conv_mixed_dtype
():
def
test_dnn_conv_mixed_dtype
():
mf
=
T
.
ftensor4
()
mf
=
tt
.
ftensor4
()
md
=
T
.
dtensor4
()
md
=
tt
.
dtensor4
()
def
assert_types
(
conv
):
def
assert_types
(
conv
):
dt
=
conv
.
owner
.
inputs
[
0
]
.
dtype
dt
=
conv
.
owner
.
inputs
[
0
]
.
dtype
...
@@ -259,8 +270,8 @@ def test_dnn_conv_mixed_dtype():
...
@@ -259,8 +270,8 @@ def test_dnn_conv_mixed_dtype():
def
test_dnn_conv3d_mixed_dtype
():
def
test_dnn_conv3d_mixed_dtype
():
mf
=
T
.
ftensor5
()
mf
=
tt
.
ftensor5
()
md
=
T
.
dtensor5
()
md
=
tt
.
dtensor5
()
def
assert_types
(
conv
):
def
assert_types
(
conv
):
dt
=
conv
.
owner
.
inputs
[
0
]
.
dtype
dt
=
conv
.
owner
.
inputs
[
0
]
.
dtype
...
@@ -280,7 +291,7 @@ def test_pooling():
...
@@ -280,7 +291,7 @@ def test_pooling():
modes
=
get_dnn_pool_modes
()
modes
=
get_dnn_pool_modes
()
x
=
T
.
tensor4
()
x
=
tt
.
tensor4
()
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
),
(
1
,
0
),
(
0
,
1
),
(
2
,
3
),
(
3
,
2
))):
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
),
(
1
,
0
),
(
0
,
1
),
(
2
,
3
),
(
3
,
2
))):
if
pad
!=
(
0
,
0
)
and
mode
==
"average_exc_pad"
:
if
pad
!=
(
0
,
0
)
and
mode
==
"average_exc_pad"
:
# Not implemented
# Not implemented
...
@@ -386,7 +397,7 @@ def test_pooling():
...
@@ -386,7 +397,7 @@ def test_pooling():
def
run_pooling_with_tensor_vars
(
mode
):
def
run_pooling_with_tensor_vars
(
mode
):
utt
.
seed_rng
()
utt
.
seed_rng
()
x
=
T
.
tensor4
()
x
=
tt
.
tensor4
()
ws
=
theano
.
shared
(
np
.
array
([
2
,
2
],
dtype
=
"int32"
))
ws
=
theano
.
shared
(
np
.
array
([
2
,
2
],
dtype
=
"int32"
))
stride
=
theano
.
shared
(
np
.
array
([
1
,
1
],
dtype
=
"int32"
))
stride
=
theano
.
shared
(
np
.
array
([
1
,
1
],
dtype
=
"int32"
))
pad
=
theano
.
shared
(
np
.
array
([
0
,
0
],
dtype
=
"int32"
))
pad
=
theano
.
shared
(
np
.
array
([
0
,
0
],
dtype
=
"int32"
))
...
@@ -446,7 +457,7 @@ def test_pooling3d():
...
@@ -446,7 +457,7 @@ def test_pooling3d():
modes
=
get_dnn_pool_modes
()
modes
=
get_dnn_pool_modes
()
x
=
T
.
tensor5
()
x
=
tt
.
tensor5
()
for
mode
,
pad
in
product
(
for
mode
,
pad
in
product
(
modes
,
modes
,
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
2
,
3
,
2
),
(
3
,
2
,
2
),
(
2
,
2
,
3
)),
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
2
,
3
,
2
),
(
3
,
2
,
2
),
(
2
,
2
,
3
)),
...
@@ -549,7 +560,7 @@ def test_pooling_opt():
...
@@ -549,7 +560,7 @@ def test_pooling_opt():
utt
.
seed_rng
()
utt
.
seed_rng
()
# 2D pooling
# 2D pooling
x
=
T
.
matrix
()
x
=
tt
.
matrix
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
...
@@ -564,7 +575,7 @@ def test_pooling_opt():
...
@@ -564,7 +575,7 @@ def test_pooling_opt():
# gradient of 2D pooling
# gradient of 2D pooling
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
T
.
grad
(
tt
.
grad
(
pool_2d
(
x
,
ws
=
(
2
,
2
),
mode
=
"average_inc_pad"
,
ignore_border
=
True
)
.
sum
(),
x
pool_2d
(
x
,
ws
=
(
2
,
2
),
mode
=
"average_inc_pad"
,
ignore_border
=
True
)
.
sum
(),
x
),
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
),
...
@@ -586,7 +597,7 @@ def test_pooling_opt():
...
@@ -586,7 +597,7 @@ def test_pooling_opt():
f
(
data
)
f
(
data
)
# 3D pooling
# 3D pooling
x
=
T
.
tensor3
()
x
=
tt
.
tensor3
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
...
@@ -601,7 +612,7 @@ def test_pooling_opt():
...
@@ -601,7 +612,7 @@ def test_pooling_opt():
# gradient of 3D pooling
# gradient of 3D pooling
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
T
.
grad
(
tt
.
grad
(
pool_3d
(
x
,
ws
=
(
2
,
2
,
2
),
mode
=
"average_inc_pad"
,
ignore_border
=
True
)
.
sum
(),
pool_3d
(
x
,
ws
=
(
2
,
2
,
2
),
mode
=
"average_inc_pad"
,
ignore_border
=
True
)
.
sum
(),
x
,
x
,
),
),
...
@@ -632,7 +643,7 @@ def test_pooling_opt_arbitrary_dimensions():
...
@@ -632,7 +643,7 @@ def test_pooling_opt_arbitrary_dimensions():
for
mode
in
modes
:
for
mode
in
modes
:
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool_grad
=
T
.
grad
(
T
.
sum
(
out_pool
),
wrt
=
input
)
out_pool_grad
=
tt
.
grad
(
tt
.
sum
(
out_pool
),
wrt
=
input
)
out
=
[
out_pool
,
out_pool_grad
]
out
=
[
out_pool
,
out_pool_grad
]
# run on GPU
# run on GPU
...
@@ -679,14 +690,14 @@ def test_pooling_opt_arbitrary_dimensions():
...
@@ -679,14 +690,14 @@ def test_pooling_opt_arbitrary_dimensions():
def
test_pooling_empty_batch
():
def
test_pooling_empty_batch
():
img_shp
=
(
0
,
5
,
6
,
8
)
img_shp
=
(
0
,
5
,
6
,
8
)
img
=
T
.
ftensor4
(
"img"
)
img
=
tt
.
ftensor4
(
"img"
)
o
=
dnn
.
dnn_pool
(
img
,
(
2
,
2
),
(
2
,
2
))
o
=
dnn
.
dnn_pool
(
img
,
(
2
,
2
),
(
2
,
2
))
f
=
theano
.
function
([
img
],
o
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
img
],
o
,
mode
=
mode_with_gpu
)
d
=
f
(
np
.
random
.
rand
(
*
img_shp
)
.
astype
(
"float32"
))
d
=
f
(
np
.
random
.
rand
(
*
img_shp
)
.
astype
(
"float32"
))
assert
d
.
shape
==
(
0
,
5
,
3
,
4
)
assert
d
.
shape
==
(
0
,
5
,
3
,
4
)
g
=
T
.
grad
(
T
.
sum
(
o
),
wrt
=
img
)
g
=
tt
.
grad
(
tt
.
sum
(
o
),
wrt
=
img
)
f
=
theano
.
function
([
img
],
g
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
img
],
g
,
mode
=
mode_with_gpu
)
d
=
f
(
np
.
random
.
rand
(
*
img_shp
)
.
astype
(
"float32"
))
d
=
f
(
np
.
random
.
rand
(
*
img_shp
)
.
astype
(
"float32"
))
# Not sure what to assert, it should just pass, that's all.
# Not sure what to assert, it should just pass, that's all.
...
@@ -696,7 +707,7 @@ def test_pooling_empty_batch():
...
@@ -696,7 +707,7 @@ def test_pooling_empty_batch():
def
test_dnn_tag
():
def
test_dnn_tag
():
# Test that if cudnn isn't avail we crash and that if it is avail, we use it.
# Test that if cudnn isn't avail we crash and that if it is avail, we use it.
x
=
T
.
tensor4
()
x
=
tt
.
tensor4
()
old
=
theano
.
config
.
on_opt_error
old
=
theano
.
config
.
on_opt_error
theano
.
config
.
on_opt_error
=
"raise"
theano
.
config
.
on_opt_error
=
"raise"
...
@@ -737,7 +748,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -737,7 +748,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
super
()
.
setup_method
()
super
()
.
setup_method
()
def
test_softmax
(
self
):
def
test_softmax
(
self
):
t
=
T
.
tensor4
(
"t"
)
t
=
tt
.
tensor4
(
"t"
)
rand_tensor
=
np
.
asarray
(
np
.
random
.
rand
(
5
,
4
,
3
,
2
),
dtype
=
theano
.
config
.
floatX
)
rand_tensor
=
np
.
asarray
(
np
.
random
.
rand
(
5
,
4
,
3
,
2
),
dtype
=
theano
.
config
.
floatX
)
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
t
],
[
t
],
...
@@ -748,7 +759,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -748,7 +759,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
t
],
[
t
],
[
T
.
grad
(
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
t
)
.
mean
(),
t
)],
[
tt
.
grad
(
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
t
)
.
mean
(),
t
)],
[
rand_tensor
],
[
rand_tensor
],
dnn
.
GpuDnnSoftmaxGrad
,
dnn
.
GpuDnnSoftmaxGrad
,
)
)
...
@@ -815,9 +826,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -815,9 +826,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations
+=
[(
2
,
2
)]
dilations
+=
[(
2
,
2
)]
self
.
_test_conv
(
self
.
_test_conv
(
T
.
tensor4
(
"img"
),
tt
.
tensor4
(
"img"
),
T
.
tensor4
(
"kerns"
),
tt
.
tensor4
(
"kerns"
),
T
.
tensor4
(
"out"
),
tt
.
tensor4
(
"out"
),
np
.
random
.
rand
(
7
,
2
,
12
,
16
),
np
.
random
.
rand
(
7
,
2
,
12
,
16
),
np
.
random
.
rand
(
8
,
2
,
4
,
3
),
np
.
random
.
rand
(
8
,
2
,
4
,
3
),
border_mode
,
border_mode
,
...
@@ -834,9 +845,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -834,9 +845,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations
=
[(
1
,
1
,
1
),
(
2
,
2
,
2
)]
if
dnn
.
version
()
>=
6000
else
[(
1
,
1
,
1
)]
dilations
=
[(
1
,
1
,
1
),
(
2
,
2
,
2
)]
if
dnn
.
version
()
>=
6000
else
[(
1
,
1
,
1
)]
self
.
_test_conv
(
self
.
_test_conv
(
T
.
tensor5
(
"img"
),
tt
.
tensor5
(
"img"
),
T
.
tensor5
(
"kerns"
),
tt
.
tensor5
(
"kerns"
),
T
.
tensor5
(
"out"
),
tt
.
tensor5
(
"out"
),
np
.
random
.
rand
(
10
,
2
,
15
,
16
,
17
),
np
.
random
.
rand
(
10
,
2
,
15
,
16
,
17
),
np
.
random
.
rand
(
8
,
2
,
4
,
3
,
1
),
np
.
random
.
rand
(
8
,
2
,
4
,
3
,
1
),
border_mode
,
border_mode
,
...
@@ -901,9 +912,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -901,9 +912,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations
=
[(
1
,
1
),
(
2
,
2
)]
if
dnn
.
version
()
>=
6000
else
[(
1
,
1
)]
dilations
=
[(
1
,
1
),
(
2
,
2
)]
if
dnn
.
version
()
>=
6000
else
[(
1
,
1
)]
self
.
_test_conv_gradw
(
self
.
_test_conv_gradw
(
T
.
tensor4
(
"img"
),
tt
.
tensor4
(
"img"
),
T
.
tensor4
(
"topgrad"
),
tt
.
tensor4
(
"topgrad"
),
T
.
tensor4
(
"kerns"
),
tt
.
tensor4
(
"kerns"
),
(
5
,
2
,
6
,
13
),
(
5
,
2
,
6
,
13
),
(
1
,
2
,
3
,
7
),
(
1
,
2
,
3
,
7
),
border_mode
,
border_mode
,
...
@@ -913,9 +924,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -913,9 +924,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
def
test_conv_gradi
(
self
):
def
test_conv_gradi
(
self
):
img
=
T
.
tensor4
(
"img"
)
img
=
tt
.
tensor4
(
"img"
)
kerns
=
T
.
tensor4
(
"kerns"
)
kerns
=
tt
.
tensor4
(
"kerns"
)
out
=
T
.
tensor4
(
"out"
)
out
=
tt
.
tensor4
(
"out"
)
kern_vals
=
np
.
asarray
(
np
.
random
.
rand
(
13
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
kern_vals
=
np
.
asarray
(
np
.
random
.
rand
(
13
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
out_vals
=
np
.
asarray
(
np
.
random
.
rand
(
3
,
13
,
9
,
11
),
dtype
=
theano
.
config
.
floatX
)
out_vals
=
np
.
asarray
(
np
.
random
.
rand
(
3
,
13
,
9
,
11
),
dtype
=
theano
.
config
.
floatX
)
...
@@ -948,7 +959,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -948,7 +959,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
def
test_pool
(
self
):
def
test_pool
(
self
):
img
=
T
.
tensor4
(
"img"
)
img
=
tt
.
tensor4
(
"img"
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
)
modes
=
get_dnn_pool_modes
()
modes
=
get_dnn_pool_modes
()
...
@@ -964,7 +975,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -964,7 +975,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
def
test_pool_3d
(
self
):
def
test_pool_3d
(
self
):
img
=
T
.
tensor5
(
"img"
)
img
=
tt
.
tensor5
(
"img"
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
modes
=
get_dnn_pool_modes
()
modes
=
get_dnn_pool_modes
()
...
@@ -980,9 +991,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -980,9 +991,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
def
test_pool_grad
(
self
):
def
test_pool_grad
(
self
):
img
=
T
.
tensor4
(
"img"
)
img
=
tt
.
tensor4
(
"img"
)
img_grad
=
T
.
tensor4
(
"img_grad"
)
img_grad
=
tt
.
tensor4
(
"img_grad"
)
out
=
T
.
tensor4
(
"out"
)
out
=
tt
.
tensor4
(
"out"
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
)
img_grad_val
=
np
.
asarray
(
img_grad_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
np
.
random
.
rand
(
2
,
3
,
4
,
5
),
dtype
=
theano
.
config
.
floatX
...
@@ -1006,9 +1017,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -1006,9 +1017,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
def
test_pool_3d_grad
(
self
):
def
test_pool_3d_grad
(
self
):
img
=
T
.
tensor5
(
"img"
)
img
=
tt
.
tensor5
(
"img"
)
img_grad
=
T
.
tensor5
(
"img_grad"
)
img_grad
=
tt
.
tensor5
(
"img_grad"
)
out
=
T
.
tensor5
(
"out"
)
out
=
tt
.
tensor5
(
"out"
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
img_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
)
img_grad_val
=
np
.
asarray
(
img_grad_val
=
np
.
asarray
(
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
np
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
theano
.
config
.
floatX
...
@@ -1034,8 +1045,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -1034,8 +1045,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
# this has been a problem in the past
# this has been a problem in the past
def
test_dnn_conv_border_mode
():
def
test_dnn_conv_border_mode
():
img
=
T
.
tensor4
()
img
=
tt
.
tensor4
()
kern
=
T
.
tensor4
()
kern
=
tt
.
tensor4
()
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
1
)
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
1
)
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
(
2
,
3
))
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
(
2
,
3
))
...
@@ -1047,9 +1058,9 @@ def test_dnn_conv_border_mode():
...
@@ -1047,9 +1058,9 @@ def test_dnn_conv_border_mode():
def
test_dnn_conv_alpha_output_merge
():
def
test_dnn_conv_alpha_output_merge
():
utt
.
seed_rng
()
utt
.
seed_rng
()
img
=
T
.
tensor4
()
img
=
tt
.
tensor4
()
kern
=
T
.
tensor4
()
kern
=
tt
.
tensor4
()
out
=
T
.
tensor4
()
out
=
tt
.
tensor4
()
b
=
1
b
=
1
c
=
4
c
=
4
...
@@ -1313,7 +1324,7 @@ def test_conv3d_fwd():
...
@@ -1313,7 +1324,7 @@ def test_conv3d_fwd():
flipped_filters
=
filters
flipped_filters
=
filters
# Compile a theano function for the reference implementation
# Compile a theano function for the reference implementation
conv_ref
=
theano
.
tensor
.
nnet
.
corr3d
.
Corr3dMM
(
conv_ref
=
Corr3dMM
(
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_dilation
=
dilation
,
filter_dilation
=
dilation
,
...
@@ -1365,7 +1376,7 @@ def test_conv3d_bwd():
...
@@ -1365,7 +1376,7 @@ def test_conv3d_bwd():
conv_mode
=
conv_mode
,
conv_mode
=
conv_mode
,
)
)
grad_i
,
grad_w
=
t
heano
.
tensor
.
grad
(
conv
.
sum
(),
[
inputs
,
filters
])
grad_i
,
grad_w
=
t
t
.
grad
(
conv
.
sum
(),
[
inputs
,
filters
])
f
=
theano
.
function
([],
[
grad_i
,
grad_w
],
mode
=
mode_with_gpu
)
f
=
theano
.
function
([],
[
grad_i
,
grad_w
],
mode
=
mode_with_gpu
)
...
@@ -1377,12 +1388,12 @@ def test_conv3d_bwd():
...
@@ -1377,12 +1388,12 @@ def test_conv3d_bwd():
flipped_filters
=
filters
flipped_filters
=
filters
# Compile a theano function for the reference implementation
# Compile a theano function for the reference implementation
conv_ref
=
theano
.
tensor
.
nnet
.
corr3d
.
Corr3dMM
(
conv_ref
=
Corr3dMM
(
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_dilation
=
dilation
,
filter_dilation
=
dilation
,
)(
ref_cast
(
inputs
),
flipped_filters
)
)(
ref_cast
(
inputs
),
flipped_filters
)
(
grad_i_ref
,
grad_w_ref
)
=
t
heano
.
tensor
.
grad
(
conv_ref
.
sum
(),
[
inputs
,
filters
])
(
grad_i_ref
,
grad_w_ref
)
=
t
t
.
grad
(
conv_ref
.
sum
(),
[
inputs
,
filters
])
f_ref
=
theano
.
function
([],
[
grad_i_ref
,
grad_w_ref
],
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([],
[
grad_i_ref
,
grad_w_ref
],
mode
=
"FAST_RUN"
)
# Compare the results of the two implementations
# Compare the results of the two implementations
...
@@ -1418,22 +1429,22 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1418,22 +1429,22 @@ class TestSoftMax(test_nnet.TestSoftMax):
data
=
np
.
arange
(
np
.
product
(
dims
),
dtype
=
theano
.
config
.
floatX
)
.
reshape
(
dims
)
data
=
np
.
arange
(
np
.
product
(
dims
),
dtype
=
theano
.
config
.
floatX
)
.
reshape
(
dims
)
# Verify the forward op
# Verify the forward op
x_gpu
=
T
.
tensor4
(
"x_gpu"
)
x_gpu
=
tt
.
tensor4
(
"x_gpu"
)
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
x_gpu
)
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
x_gpu
)
f_gpu
=
theano
.
function
([
x_gpu
],
f_gpu
,
mode
=
self
.
mode
)
f_gpu
=
theano
.
function
([
x_gpu
],
f_gpu
,
mode
=
self
.
mode
)
assert
f_gpu
(
data
)
.
shape
==
dims
assert
f_gpu
(
data
)
.
shape
==
dims
# Verify the gradient op
# Verify the gradient op
dy_gpu
=
T
.
tensor4
(
"dy_gpu"
)
dy_gpu
=
tt
.
tensor4
(
"dy_gpu"
)
sm_gpu
=
T
.
tensor4
(
"sm_gpu"
)
sm_gpu
=
tt
.
tensor4
(
"sm_gpu"
)
f_grad_gpu
=
dnn
.
GpuDnnSoftmaxGrad
(
"accurate"
,
"channel"
)(
dy_gpu
,
sm_gpu
)
f_grad_gpu
=
dnn
.
GpuDnnSoftmaxGrad
(
"accurate"
,
"channel"
)(
dy_gpu
,
sm_gpu
)
f_grad_gpu
=
theano
.
function
([
dy_gpu
,
sm_gpu
],
f_grad_gpu
,
mode
=
self
.
mode
)
f_grad_gpu
=
theano
.
function
([
dy_gpu
,
sm_gpu
],
f_grad_gpu
,
mode
=
self
.
mode
)
assert
f_grad_gpu
(
data
,
data
)
.
shape
==
dims
assert
f_grad_gpu
(
data
,
data
)
.
shape
==
dims
def
test_softmax_f16
(
self
):
def
test_softmax_f16
(
self
):
x
=
T
.
matrix
(
"x"
,
"float16"
)
x
=
tt
.
matrix
(
"x"
,
"float16"
)
x_gpu
=
T
.
tensor4
(
"x_gpu"
,
"float16"
)
x_gpu
=
tt
.
tensor4
(
"x_gpu"
,
"float16"
)
f_z
=
T
.
nnet
.
softmax_op
f_z
=
softmax_op
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
def
cmp
(
n
,
m
,
f
,
f_gpu
):
def
cmp
(
n
,
m
,
f
,
f_gpu
):
...
@@ -1455,15 +1466,15 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1455,15 +1466,15 @@ class TestSoftMax(test_nnet.TestSoftMax):
gout
=
np
.
asarray
(
f_gpu
(
gdata
))[:,
:,
0
,
0
]
gout
=
np
.
asarray
(
f_gpu
(
gdata
))[:,
:,
0
,
0
]
utt
.
assert_allclose
(
out
,
gout
)
utt
.
assert_allclose
(
out
,
gout
)
x
=
T
.
matrix
(
"x"
)
x
=
tt
.
matrix
(
"x"
)
x_gpu
=
T
.
tensor4
(
"x_gpu"
)
x_gpu
=
tt
.
tensor4
(
"x_gpu"
)
f_z
=
T
.
nnet
.
softmax_op
f_z
=
softmax_op
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
# Verify the grad operation
# Verify the grad operation
dims
=
(
2
,
3
,
4
,
5
)
dims
=
(
2
,
3
,
4
,
5
)
gdata
=
np
.
arange
(
np
.
product
(
dims
),
dtype
=
theano
.
config
.
floatX
)
.
reshape
(
dims
)
gdata
=
np
.
arange
(
np
.
product
(
dims
),
dtype
=
theano
.
config
.
floatX
)
.
reshape
(
dims
)
T
.
verify_grad
(
f_gpu
,
[
gdata
],
rng
=
np
.
random
,
mode
=
mode_with_gpu
)
tt
.
verify_grad
(
f_gpu
,
[
gdata
],
rng
=
np
.
random
,
mode
=
mode_with_gpu
)
# Verify that the CPU and GPU implementations return the same results
# Verify that the CPU and GPU implementations return the same results
# up to a tolerance.
# up to a tolerance.
...
@@ -1474,65 +1485,34 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1474,65 +1485,34 @@ class TestSoftMax(test_nnet.TestSoftMax):
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# optimization is applied when cudnn is required
# optimization is applied when cudnn is required
y
=
T
.
vector
(
"y"
)
y
=
tt
.
vector
(
"y"
)
f
=
theano
.
function
(
f
=
theano
.
function
([
y
],
tt
.
grad
(
softmax
(
y
)
.
mean
(),
y
),
mode
=
mode_with_gpu
)
[
y
],
T
.
grad
(
T
.
nnet
.
softmax
(
y
)
.
mean
(),
y
),
mode
=
mode_with_gpu
)
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
val
=
np
.
random
.
rand
(
5
)
.
astype
(
theano
.
config
.
floatX
)
val
=
np
.
random
.
rand
(
5
)
.
astype
(
theano
.
config
.
floatX
)
out_dnn
=
f
(
val
)
out_dnn
=
f
(
val
)
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
1
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
1
assert
(
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
SoftmaxGrad
)])
==
0
len
(
[
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
theano
.
tensor
.
nnet
.
SoftmaxGrad
)
]
)
==
0
)
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# optimization is not applied when cudnn is excluded or not
# optimization is not applied when cudnn is excluded or not
# available
# available
mode_wo_cudnn
=
mode_with_gpu
.
excluding
(
"cudnn"
)
mode_wo_cudnn
=
mode_with_gpu
.
excluding
(
"cudnn"
)
y
=
T
.
vector
(
"y"
)
y
=
tt
.
vector
(
"y"
)
f
=
theano
.
function
(
f
=
theano
.
function
([
y
],
tt
.
grad
(
softmax
(
y
)
.
mean
(),
y
),
mode
=
mode_wo_cudnn
)
[
y
],
T
.
grad
(
T
.
nnet
.
softmax
(
y
)
.
mean
(),
y
),
mode
=
mode_wo_cudnn
)
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
out_cpu
=
f
(
val
)
out_cpu
=
f
(
val
)
utt
.
assert_allclose
(
out_dnn
,
out_cpu
)
utt
.
assert_allclose
(
out_dnn
,
out_cpu
)
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
0
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
0
assert
(
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
SoftmaxGrad
)])
==
1
len
(
[
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
theano
.
tensor
.
nnet
.
SoftmaxGrad
)
]
)
==
1
)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
# crash with manual graph
# crash with manual graph
y
=
T
.
vector
(
"y"
)
y
=
tt
.
vector
(
"y"
)
o
=
theano
.
tensor
.
nnet
.
SoftmaxGrad
()(
y
,
y
*
2
)
o
=
SoftmaxGrad
()(
y
,
y
*
2
)
f
=
theano
.
function
([
y
],
o
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
y
],
o
,
mode
=
mode_with_gpu
)
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
sorted_f
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
1
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
self
.
gpu_grad_op
)])
==
1
assert
(
assert
len
([
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
SoftmaxGrad
)])
==
0
len
(
[
i
for
i
in
sorted_f
if
isinstance
(
i
.
op
,
theano
.
tensor
.
nnet
.
SoftmaxGrad
)
]
)
==
0
)
@pytest.mark.skipif
(
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
3000
,
reason
=
"Log-softmax is only in cudnn v3+"
dnn
.
version
(
raises
=
False
)
<
3000
,
reason
=
"Log-softmax is only in cudnn v3+"
...
@@ -1540,9 +1520,9 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1540,9 +1520,9 @@ class TestSoftMax(test_nnet.TestSoftMax):
def
test_log_softmax
(
self
):
def
test_log_softmax
(
self
):
# This is a test for an optimization that depends on cuDNN v3 or
# This is a test for an optimization that depends on cuDNN v3 or
# more recent. Don't test if the cuDNN version is too old.
# more recent. Don't test if the cuDNN version is too old.
x
=
T
.
tensor4
()
x
=
tt
.
tensor4
()
softmax_out
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
x
)
softmax_out
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)(
x
)
log_out
=
T
.
log
(
T
.
as_tensor_variable
(
softmax_out
))
log_out
=
tt
.
log
(
tt
.
as_tensor_variable
(
softmax_out
))
f
=
theano
.
function
([
x
],
log_out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
],
log_out
,
mode
=
mode_with_gpu
)
...
@@ -1585,11 +1565,11 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1585,11 +1565,11 @@ class TestSoftMax(test_nnet.TestSoftMax):
# Compile a reference function, on the CPU, to be used to validate the
# Compile a reference function, on the CPU, to be used to validate the
# results of the other function.
# results of the other function.
x
=
T
.
matrix
()
x
=
tt
.
matrix
()
f_ref
=
theano
.
function
([
x
],
T
.
nnet
.
LogSoftmax
()(
x
))
f_ref
=
theano
.
function
([
x
],
LogSoftmax
()(
x
))
# Build the first graph and ensure that the optimization is applied
# Build the first graph and ensure that the optimization is applied
log_softmax_out
=
T
.
nnet
.
LogSoftmax
()(
x
)
log_softmax_out
=
LogSoftmax
()(
x
)
f
=
theano
.
function
([
x
],
log_softmax_out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
],
log_softmax_out
,
mode
=
mode_with_gpu
)
dnn_softmax_nodes
=
[
dnn_softmax_nodes
=
[
...
@@ -1603,7 +1583,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1603,7 +1583,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
utt
.
assert_allclose
(
f
(
inp
),
f_ref
(
inp
))
utt
.
assert_allclose
(
f
(
inp
),
f_ref
(
inp
))
# Build the first graph and ensure that the optimization is applied
# Build the first graph and ensure that the optimization is applied
log_softmax_out
=
T
.
log
(
T
.
nnet
.
Softmax
()(
x
))
log_softmax_out
=
tt
.
log
(
Softmax
()(
x
))
f
=
theano
.
function
([
x
],
log_softmax_out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
],
log_softmax_out
,
mode
=
mode_with_gpu
)
dnn_softmax_nodes
=
[
dnn_softmax_nodes
=
[
...
@@ -1618,7 +1598,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
...
@@ -1618,7 +1598,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
def
dnn_reduction
(
nd
,
idtype
,
acc_dtype
,
odtype
):
def
dnn_reduction
(
nd
,
idtype
,
acc_dtype
,
odtype
):
inp
=
T
.
TensorType
(
idtype
,
(
False
,)
*
nd
)()
inp
=
tt
.
TensorType
(
idtype
,
(
False
,)
*
nd
)()
res
=
inp
.
sum
(
acc_dtype
=
acc_dtype
,
dtype
=
odtype
)
res
=
inp
.
sum
(
acc_dtype
=
acc_dtype
,
dtype
=
odtype
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
assert
any
(
assert
any
(
...
@@ -1641,7 +1621,7 @@ def test_dnn_reduction_opt():
...
@@ -1641,7 +1621,7 @@ def test_dnn_reduction_opt():
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
def
test_dnn_reduction_sum_squares
():
def
test_dnn_reduction_sum_squares
():
M
=
T
.
matrix
()
M
=
tt
.
matrix
()
for
axis
in
(
None
,
0
,
1
):
for
axis
in
(
None
,
0
,
1
):
out
=
(
M
**
2
)
.
sum
(
axis
=
axis
)
out
=
(
M
**
2
)
.
sum
(
axis
=
axis
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
...
@@ -1655,7 +1635,7 @@ def test_dnn_reduction_sum_squares():
...
@@ -1655,7 +1635,7 @@ def test_dnn_reduction_sum_squares():
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
def
test_dnn_reduction_sum_abs
():
def
test_dnn_reduction_sum_abs
():
M
=
T
.
matrix
()
M
=
tt
.
matrix
()
for
axis
in
(
None
,
0
,
1
):
for
axis
in
(
None
,
0
,
1
):
out
=
abs
(
M
)
.
sum
(
axis
=
axis
)
out
=
abs
(
M
)
.
sum
(
axis
=
axis
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
...
@@ -1669,7 +1649,7 @@ def test_dnn_reduction_sum_abs():
...
@@ -1669,7 +1649,7 @@ def test_dnn_reduction_sum_abs():
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
@pytest.mark.skipif
(
dnn
.
version
(
raises
=
False
)
<
6000
,
reason
=
dnn
.
dnn_available
.
msg
)
def
test_dnn_reduction_absmax
():
def
test_dnn_reduction_absmax
():
M
=
T
.
matrix
()
M
=
tt
.
matrix
()
for
axis
in
(
None
,
0
,
1
):
for
axis
in
(
None
,
0
,
1
):
out
=
abs
(
M
)
.
max
(
axis
=
axis
)
out
=
abs
(
M
)
.
max
(
axis
=
axis
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
M
],
out
,
mode
=
mode_with_gpu
)
...
@@ -1692,9 +1672,7 @@ def test_dnn_reduction_axis_size_one():
...
@@ -1692,9 +1672,7 @@ def test_dnn_reduction_axis_size_one():
[(
4
,
1
,
6
,
1
),
(
1
,
3
)],
[(
4
,
1
,
6
,
1
),
(
1
,
3
)],
]:
]:
x
=
theano
.
tensor
.
TensorType
(
x
=
tt
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
[
False
]
*
len
(
shape
))()
dtype
=
dtype
,
broadcastable
=
[
False
]
*
len
(
shape
)
)()
sum
=
x
.
sum
(
axis
=
axis
)
sum
=
x
.
sum
(
axis
=
axis
)
sum_squares
=
(
x
**
2
)
.
sum
(
axis
=
axis
)
sum_squares
=
(
x
**
2
)
.
sum
(
axis
=
axis
)
sum_abs
=
abs
(
x
)
.
sum
(
axis
=
axis
)
sum_abs
=
abs
(
x
)
.
sum
(
axis
=
axis
)
...
@@ -1779,9 +1757,9 @@ def test_dnn_reduction_error():
...
@@ -1779,9 +1757,9 @@ def test_dnn_reduction_error():
slow_output
=
np
.
sum
(
slow_output
.
transpose
(),
axis
=
1
)
slow_output
=
np
.
sum
(
slow_output
.
transpose
(),
axis
=
1
)
vecT
=
T
.
vector
(
dtype
=
theano
.
config
.
floatX
)
vecT
=
tt
.
vector
(
dtype
=
theano
.
config
.
floatX
)
outputT
=
T
.
alloc
(
2.0
*
vecT
,
5
,
vecT
.
shape
[
0
])
outputT
=
tt
.
alloc
(
2.0
*
vecT
,
5
,
vecT
.
shape
[
0
])
outputSummedT
=
T
.
sum
(
T
.
transpose
(
outputT
),
axis
=
1
)
outputSummedT
=
tt
.
sum
(
tt
.
transpose
(
outputT
),
axis
=
1
)
f3
=
theano
.
function
(
inputs
=
[
vecT
],
outputs
=
outputSummedT
)
f3
=
theano
.
function
(
inputs
=
[
vecT
],
outputs
=
outputSummedT
)
output
=
f3
(
vec
)
output
=
f3
(
vec
)
...
@@ -1789,8 +1767,8 @@ def test_dnn_reduction_error():
...
@@ -1789,8 +1767,8 @@ def test_dnn_reduction_error():
def
dnn_maxargmax
(
nd
,
idtype
,
axis
):
def
dnn_maxargmax
(
nd
,
idtype
,
axis
):
inp
=
T
.
TensorType
(
idtype
,
(
False
,)
*
nd
)()
inp
=
tt
.
TensorType
(
idtype
,
(
False
,)
*
nd
)()
res
=
T
.
max_and_argmax
(
inp
,
axis
=
axis
)
res
=
tt
.
max_and_argmax
(
inp
,
axis
=
axis
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
inp
],
res
,
mode
=
mode_with_gpu
)
assert
any
(
assert
any
(
isinstance
(
n
.
op
,
dnn
.
GpuDnnReduction
)
for
n
in
f
.
maker
.
fgraph
.
apply_nodes
isinstance
(
n
.
op
,
dnn
.
GpuDnnReduction
)
for
n
in
f
.
maker
.
fgraph
.
apply_nodes
...
@@ -1819,7 +1797,14 @@ def test_dnn_batchnorm_train():
...
@@ -1819,7 +1797,14 @@ def test_dnn_batchnorm_train():
utt
.
seed_rng
()
utt
.
seed_rng
()
for
mode
in
(
"per-activation"
,
"spatial"
):
for
mode
in
(
"per-activation"
,
"spatial"
):
for
vartype
in
(
T
.
tensor6
,
T
.
tensor5
,
T
.
tensor4
,
T
.
tensor3
,
T
.
matrix
,
T
.
vector
):
for
vartype
in
(
tt
.
tensor6
,
tt
.
tensor5
,
tt
.
tensor4
,
tt
.
tensor3
,
tt
.
matrix
,
tt
.
vector
,
):
x
,
scale
,
bias
,
running_mean
,
running_var
=
(
x
,
scale
,
bias
,
running_mean
,
running_var
=
(
vartype
(
n
)
vartype
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"running_mean"
,
"running_var"
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"running_mean"
,
"running_var"
)
...
@@ -1869,10 +1854,10 @@ def test_dnn_batchnorm_train():
...
@@ -1869,10 +1854,10 @@ def test_dnn_batchnorm_train():
axes
=
(
0
,)
+
tuple
(
range
(
2
,
ndim
))
axes
=
(
0
,)
+
tuple
(
range
(
2
,
ndim
))
x_mean_ref
=
x
.
mean
(
axis
=
axes
,
keepdims
=
True
)
x_mean_ref
=
x
.
mean
(
axis
=
axes
,
keepdims
=
True
)
x_var_ref
=
x
.
var
(
axis
=
axes
,
keepdims
=
True
)
x_var_ref
=
x
.
var
(
axis
=
axes
,
keepdims
=
True
)
x_invstd_ref
=
T
.
inv
(
T
.
sqrt
(
x_var_ref
+
eps
))
x_invstd_ref
=
tt
.
inv
(
tt
.
sqrt
(
x_var_ref
+
eps
))
scale_ref
=
T
.
addbroadcast
(
scale
,
*
axes
)
scale_ref
=
tt
.
addbroadcast
(
scale
,
*
axes
)
bias_ref
=
T
.
addbroadcast
(
bias
,
*
axes
)
bias_ref
=
tt
.
addbroadcast
(
bias
,
*
axes
)
m
=
T
.
cast
(
T
.
prod
(
x
.
shape
)
/
T
.
prod
(
scale
.
shape
),
theano
.
config
.
floatX
)
m
=
tt
.
cast
(
tt
.
prod
(
x
.
shape
)
/
tt
.
prod
(
scale
.
shape
),
theano
.
config
.
floatX
)
out_ref
=
(
x
-
x_mean_ref
)
*
(
scale_ref
*
x_invstd_ref
)
+
bias_ref
out_ref
=
(
x
-
x_mean_ref
)
*
(
scale_ref
*
x_invstd_ref
)
+
bias_ref
out_running_mean_ref
=
(
out_running_mean_ref
=
(
running_mean
*
(
1
-
running_average_factor
)
running_mean
*
(
1
-
running_average_factor
)
...
@@ -1884,12 +1869,12 @@ def test_dnn_batchnorm_train():
...
@@ -1884,12 +1869,12 @@ def test_dnn_batchnorm_train():
)
)
# backward pass
# backward pass
dy
=
vartype
(
"dy"
)
dy
=
vartype
(
"dy"
)
grads_gpu
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_gpu
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_abstract
=
T
.
grad
(
grads_abstract
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
}
)
)
# reference backward pass
# reference backward pass
grads_ref
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_ref
:
dy
})
grads_ref
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_ref
:
dy
})
# compile
# compile
f_gpu
=
theano
.
function
(
f_gpu
=
theano
.
function
(
[
x
,
scale
,
bias
,
running_mean
,
running_var
,
dy
],
[
x
,
scale
,
bias
,
running_mean
,
running_var
,
dy
],
...
@@ -2011,10 +1996,10 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -2011,10 +1996,10 @@ def test_dnn_batchnorm_train_without_running_averages():
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
,
dy
=
(
x
,
scale
,
bias
,
dy
=
(
T
.
tensor4
(
"x"
),
tt
.
tensor4
(
"x"
),
T
.
tensor4
(
"scale"
),
tt
.
tensor4
(
"scale"
),
T
.
tensor4
(
"bias"
),
tt
.
tensor4
(
"bias"
),
T
.
tensor4
(
"dy"
),
tt
.
tensor4
(
"dy"
),
)
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
...
@@ -2027,8 +2012,8 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -2027,8 +2012,8 @@ def test_dnn_batchnorm_train_without_running_averages():
x
,
scale
,
bias
,
"per-activation"
x
,
scale
,
bias
,
"per-activation"
)
)
# backward pass
# backward pass
grads_gpu
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_gpu
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_abstract
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
})
grads_abstract
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
})
# compile
# compile
f_gpu
=
theano
.
function
(
f_gpu
=
theano
.
function
(
[
x
,
scale
,
bias
,
dy
],
[
x
,
scale
,
bias
,
dy
],
...
@@ -2081,10 +2066,10 @@ def test_without_dnn_batchnorm_train_without_running_averages():
...
@@ -2081,10 +2066,10 @@ def test_without_dnn_batchnorm_train_without_running_averages():
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
,
dy
=
(
x
,
scale
,
bias
,
dy
=
(
T
.
tensor4
(
"x"
),
tt
.
tensor4
(
"x"
),
T
.
tensor4
(
"scale"
),
tt
.
tensor4
(
"scale"
),
T
.
tensor4
(
"bias"
),
tt
.
tensor4
(
"bias"
),
T
.
tensor4
(
"dy"
),
tt
.
tensor4
(
"dy"
),
)
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
...
@@ -2094,7 +2079,7 @@ def test_without_dnn_batchnorm_train_without_running_averages():
...
@@ -2094,7 +2079,7 @@ def test_without_dnn_batchnorm_train_without_running_averages():
x
,
scale
,
bias
,
"per-activation"
x
,
scale
,
bias
,
"per-activation"
)
)
# backward pass
# backward pass
grads_abstract
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
})
grads_abstract
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
})
# compile
# compile
f_abstract
=
theano
.
function
(
f_abstract
=
theano
.
function
(
[
x
,
scale
,
bias
,
dy
],
[
x
,
scale
,
bias
,
dy
],
...
@@ -2143,7 +2128,7 @@ def test_dnn_batchnorm_train_inplace():
...
@@ -2143,7 +2128,7 @@ def test_dnn_batchnorm_train_inplace():
# test inplace_running_mean and inplace_running_var
# test inplace_running_mean and inplace_running_var
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
=
T
.
tensor4
(
"x"
),
T
.
tensor4
(
"scale"
),
T
.
tensor4
(
"bias"
)
x
,
scale
,
bias
=
tt
.
tensor4
(
"x"
),
tt
.
tensor4
(
"scale"
),
tt
.
tensor4
(
"bias"
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
running_mean
=
gpuarray_shared_constructor
(
running_mean
=
gpuarray_shared_constructor
(
...
@@ -2199,7 +2184,14 @@ def test_batchnorm_inference():
...
@@ -2199,7 +2184,14 @@ def test_batchnorm_inference():
utt
.
seed_rng
()
utt
.
seed_rng
()
for
mode
in
(
"per-activation"
,
"spatial"
):
for
mode
in
(
"per-activation"
,
"spatial"
):
for
vartype
in
(
T
.
tensor6
,
T
.
tensor5
,
T
.
tensor4
,
T
.
tensor3
,
T
.
matrix
,
T
.
vector
):
for
vartype
in
(
tt
.
tensor6
,
tt
.
tensor5
,
tt
.
tensor4
,
tt
.
tensor3
,
tt
.
matrix
,
tt
.
vector
,
):
x
,
scale
,
bias
,
mean
,
var
=
(
x
,
scale
,
bias
,
mean
,
var
=
(
vartype
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
)
vartype
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
)
)
)
...
@@ -2220,19 +2212,19 @@ def test_batchnorm_inference():
...
@@ -2220,19 +2212,19 @@ def test_batchnorm_inference():
elif
mode
==
"spatial"
:
elif
mode
==
"spatial"
:
axes
=
(
0
,)
+
tuple
(
range
(
2
,
ndim
))
axes
=
(
0
,)
+
tuple
(
range
(
2
,
ndim
))
scale_ref
,
bias_ref
,
mean_ref
,
var_ref
=
(
scale_ref
,
bias_ref
,
mean_ref
,
var_ref
=
(
T
.
addbroadcast
(
t
,
*
axes
)
for
t
in
(
scale
,
bias
,
mean
,
var
)
tt
.
addbroadcast
(
t
,
*
axes
)
for
t
in
(
scale
,
bias
,
mean
,
var
)
)
)
out_ref
=
(
x
-
mean_ref
)
*
(
scale_ref
/
T
.
sqrt
(
var_ref
+
eps
))
+
bias_ref
out_ref
=
(
x
-
mean_ref
)
*
(
scale_ref
/
tt
.
sqrt
(
var_ref
+
eps
))
+
bias_ref
# backward pass
# backward pass
dy
=
vartype
(
"dy"
)
dy
=
vartype
(
"dy"
)
grads_gpu
=
T
.
grad
(
grads_gpu
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_gpu
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_gpu
:
dy
}
)
)
grads_abstract
=
T
.
grad
(
grads_abstract
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_abstract
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_abstract
:
dy
}
)
)
# reference backward pass
# reference backward pass
grads_ref
=
T
.
grad
(
grads_ref
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_ref
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_ref
:
dy
}
)
)
# compile
# compile
...
@@ -2318,7 +2310,7 @@ def test_batchnorm_inference_inplace():
...
@@ -2318,7 +2310,7 @@ def test_batchnorm_inference_inplace():
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
,
mean
,
var
=
(
x
,
scale
,
bias
,
mean
,
var
=
(
T
.
tensor4
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
)
tt
.
tensor4
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
)
)
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
...
@@ -2345,7 +2337,7 @@ def test_batchnorm_inference_inplace():
...
@@ -2345,7 +2337,7 @@ def test_batchnorm_inference_inplace():
def
test_dnn_batchnorm_valid_and_invalid_axes
():
def
test_dnn_batchnorm_valid_and_invalid_axes
():
for
vartype
in
(
T
.
tensor5
,
T
.
tensor4
,
T
.
tensor3
,
T
.
matrix
):
for
vartype
in
(
tt
.
tensor5
,
tt
.
tensor4
,
tt
.
tensor3
,
tt
.
matrix
):
x
,
scale
,
bias
,
mean
,
var
,
dy
=
(
x
,
scale
,
bias
,
mean
,
var
,
dy
=
(
vartype
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
,
"dy"
)
vartype
(
n
)
for
n
in
(
"x"
,
"scale"
,
"bias"
,
"mean"
,
"var"
,
"dy"
)
)
)
...
@@ -2363,10 +2355,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
...
@@ -2363,10 +2355,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
out_test
=
bn
.
batch_normalization_test
(
x
,
scale
,
bias
,
mean
,
var
,
axes
)
out_test
=
bn
.
batch_normalization_test
(
x
,
scale
,
bias
,
mean
,
var
,
axes
)
# backward pass
# backward pass
dy
=
vartype
(
"dy"
)
dy
=
vartype
(
"dy"
)
grads_train
=
T
.
grad
(
grads_train
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_train
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_train
:
dy
}
)
)
grads_test
=
T
.
grad
(
grads_test
=
tt
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_test
:
dy
}
None
,
wrt
=
[
x
,
scale
,
bias
,
mean
,
var
],
known_grads
=
{
out_test
:
dy
}
)
)
# compile
# compile
...
@@ -2439,9 +2431,9 @@ def test_dnn_rnn_gru():
...
@@ -2439,9 +2431,9 @@ def test_dnn_rnn_gru():
timesteps
=
5
timesteps
=
5
# test code
# test code
X
=
T
.
tensor3
(
"X"
)
X
=
tt
.
tensor3
(
"X"
)
Y
=
T
.
tensor3
(
"Y"
)
Y
=
tt
.
tensor3
(
"Y"
)
h0
=
T
.
tensor3
(
"h0"
)
h0
=
tt
.
tensor3
(
"h0"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"gru"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"gru"
)
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
...
@@ -2465,10 +2457,10 @@ def test_dnn_rnn_gru():
...
@@ -2465,10 +2457,10 @@ def test_dnn_rnn_gru():
def
funcs
(
out
,
params
,
hy
=
None
):
def
funcs
(
out
,
params
,
hy
=
None
):
cost
=
0
cost
=
0
if
out
:
if
out
:
cost
+=
T
.
mean
((
Y
-
out
)
**
2
)
cost
+=
tt
.
mean
((
Y
-
out
)
**
2
)
if
hy
:
if
hy
:
cost
+=
T
.
mean
(
hy
**
2
)
cost
+=
tt
.
mean
(
hy
**
2
)
grad
=
T
.
grad
(
cost
,
[
X
,
h0
]
+
params
)
grad
=
tt
.
grad
(
cost
,
[
X
,
h0
]
+
params
)
grad_fn
=
theano
.
function
(
grad_fn
=
theano
.
function
(
[
X
,
Y
,
h0
],
grad
,
mode
=
mode_with_gpu
,
on_unused_input
=
"ignore"
[
X
,
Y
,
h0
],
grad
,
mode
=
mode_with_gpu
,
on_unused_input
=
"ignore"
)
)
...
@@ -2477,7 +2469,7 @@ def test_dnn_rnn_gru():
...
@@ -2477,7 +2469,7 @@ def test_dnn_rnn_gru():
ref_y
=
last_layer
.
output
()
ref_y
=
last_layer
.
output
()
# This will grab the hy from the scan implementation
# This will grab the hy from the scan implementation
ref_hy
=
T
.
stack
(
ref_hy
=
tt
.
stack
(
[
model
.
layers
[
0
]
.
Y
[
-
1
],
model
.
layers
[
1
]
.
Y
[
-
1
],
model
.
layers
[
2
]
.
Y
[
-
1
]]
[
model
.
layers
[
0
]
.
Y
[
-
1
],
model
.
layers
[
1
]
.
Y
[
-
1
],
model
.
layers
[
2
]
.
Y
[
-
1
]]
)
)
...
@@ -2548,9 +2540,9 @@ def test_dnn_rnn_gru_bidi():
...
@@ -2548,9 +2540,9 @@ def test_dnn_rnn_gru_bidi():
timesteps
=
5
timesteps
=
5
# test code
# test code
X
=
T
.
tensor3
(
"X"
)
X
=
tt
.
tensor3
(
"X"
)
Y
=
T
.
tensor3
(
"Y"
)
Y
=
tt
.
tensor3
(
"Y"
)
h0
=
T
.
tensor3
(
"h0"
)
h0
=
tt
.
tensor3
(
"h0"
)
rnnb
=
dnn
.
RNNBlock
(
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"gru"
,
direction_mode
=
"bidirectional"
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"gru"
,
direction_mode
=
"bidirectional"
...
@@ -2563,10 +2555,10 @@ def test_dnn_rnn_gru_bidi():
...
@@ -2563,10 +2555,10 @@ def test_dnn_rnn_gru_bidi():
def
funcs
(
out
,
params
,
hy
=
None
):
def
funcs
(
out
,
params
,
hy
=
None
):
cost
=
0
cost
=
0
if
out
:
if
out
:
cost
+=
T
.
mean
((
Y
-
out
)
**
2
)
cost
+=
tt
.
mean
((
Y
-
out
)
**
2
)
if
hy
:
if
hy
:
cost
+=
T
.
mean
(
hy
**
2
)
cost
+=
tt
.
mean
(
hy
**
2
)
grad
=
T
.
grad
(
cost
,
[
X
,
h0
]
+
params
)
grad
=
tt
.
grad
(
cost
,
[
X
,
h0
]
+
params
)
grad_fn
=
theano
.
function
(
grad_fn
=
theano
.
function
(
[
X
,
Y
,
h0
],
grad
,
mode
=
mode_with_gpu
,
on_unused_input
=
"ignore"
[
X
,
Y
,
h0
],
grad
,
mode
=
mode_with_gpu
,
on_unused_input
=
"ignore"
)
)
...
@@ -2609,10 +2601,10 @@ def test_dnn_rnn_lstm():
...
@@ -2609,10 +2601,10 @@ def test_dnn_rnn_lstm():
timesteps
=
5
timesteps
=
5
# test code
# test code
X
=
T
.
tensor3
(
"X"
)
X
=
tt
.
tensor3
(
"X"
)
Y
=
T
.
tensor3
(
"Y"
)
Y
=
tt
.
tensor3
(
"Y"
)
h0
=
T
.
tensor3
(
"h0"
)
h0
=
tt
.
tensor3
(
"h0"
)
c0
=
T
.
tensor3
(
"c0"
)
c0
=
tt
.
tensor3
(
"c0"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"lstm"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"lstm"
)
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
...
@@ -2635,8 +2627,8 @@ def test_dnn_rnn_lstm():
...
@@ -2635,8 +2627,8 @@ def test_dnn_rnn_lstm():
def
funcs
(
out
,
params
):
def
funcs
(
out
,
params
):
fn
=
theano
.
function
([
X
,
h0
,
c0
],
out
,
mode
=
mode_with_gpu
)
fn
=
theano
.
function
([
X
,
h0
,
c0
],
out
,
mode
=
mode_with_gpu
)
cost
=
T
.
mean
((
Y
-
out
)
**
2
)
cost
=
tt
.
mean
((
Y
-
out
)
**
2
)
grad
=
T
.
grad
(
cost
,
[
X
,
h0
,
c0
]
+
params
)
grad
=
tt
.
grad
(
cost
,
[
X
,
h0
,
c0
]
+
params
)
grad_fn
=
theano
.
function
([
X
,
Y
,
h0
,
c0
],
grad
,
mode
=
mode_with_gpu
)
grad_fn
=
theano
.
function
([
X
,
Y
,
h0
,
c0
],
grad
,
mode
=
mode_with_gpu
)
return
fn
,
grad_fn
return
fn
,
grad_fn
...
@@ -2695,10 +2687,10 @@ def test_dnn_rnn_lstm_grad_c():
...
@@ -2695,10 +2687,10 @@ def test_dnn_rnn_lstm_grad_c():
timesteps
=
5
timesteps
=
5
# test code
# test code
X
=
T
.
tensor3
(
"X"
)
X
=
tt
.
tensor3
(
"X"
)
CY
=
T
.
tensor3
(
"CY"
)
CY
=
tt
.
tensor3
(
"CY"
)
h0
=
T
.
tensor3
(
"h0"
)
h0
=
tt
.
tensor3
(
"h0"
)
c0
=
T
.
tensor3
(
"c0"
)
c0
=
tt
.
tensor3
(
"c0"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"lstm"
)
rnnb
=
dnn
.
RNNBlock
(
theano
.
config
.
floatX
,
hidden_dim
,
depth
,
"lstm"
)
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
psize
=
rnnb
.
get_param_size
([
batch_size
,
input_dim
])
...
@@ -2720,13 +2712,13 @@ def test_dnn_rnn_lstm_grad_c():
...
@@ -2720,13 +2712,13 @@ def test_dnn_rnn_lstm_grad_c():
p
[:]
=
layer_params
[
j
]
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
p
[:]
=
layer_params
[
j
]
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
def
funcs
(
out
,
params
):
def
funcs
(
out
,
params
):
cost
=
T
.
mean
((
CY
-
out
)
**
2
)
cost
=
tt
.
mean
((
CY
-
out
)
**
2
)
grad
=
T
.
grad
(
cost
,
[
X
,
h0
,
c0
]
+
params
)
grad
=
tt
.
grad
(
cost
,
[
X
,
h0
,
c0
]
+
params
)
grad_fn
=
theano
.
function
([
X
,
CY
,
h0
,
c0
],
grad
,
mode
=
mode_with_gpu
)
grad_fn
=
theano
.
function
([
X
,
CY
,
h0
,
c0
],
grad
,
mode
=
mode_with_gpu
)
return
grad_fn
return
grad_fn
_
,
_
,
cy
=
rnnb
.
apply
(
params_cudnn
,
X
,
h0
,
c0
)
_
,
_
,
cy
=
rnnb
.
apply
(
params_cudnn
,
X
,
h0
,
c0
)
ref_cy
=
T
.
stack
(
ref_cy
=
tt
.
stack
(
[
model
.
layers
[
0
]
.
C
[
-
1
],
model
.
layers
[
1
]
.
C
[
-
1
],
model
.
layers
[
2
]
.
C
[
-
1
]]
[
model
.
layers
[
0
]
.
C
[
-
1
],
model
.
layers
[
1
]
.
C
[
-
1
],
model
.
layers
[
2
]
.
C
[
-
1
]]
)
)
...
@@ -2797,14 +2789,14 @@ def test_dnn_spatialtf():
...
@@ -2797,14 +2789,14 @@ def test_dnn_spatialtf():
def
spatialtf_cpu
(
inp
,
theta
,
scale_height
,
scale_width
,
border_mode
=
"nearest"
):
def
spatialtf_cpu
(
inp
,
theta
,
scale_height
,
scale_width
,
border_mode
=
"nearest"
):
num_batch
,
num_channels
,
height
,
width
=
inp
.
shape
num_batch
,
num_channels
,
height
,
width
=
inp
.
shape
theta
=
T
.
reshape
(
theta
,
(
-
1
,
2
,
3
))
theta
=
tt
.
reshape
(
theta
,
(
-
1
,
2
,
3
))
# grid of (x_t, y_t, 1), eq (1) in ref [1]
# grid of (x_t, y_t, 1), eq (1) in ref [1]
out_height
=
T
.
cast
(
T
.
ceil
(
height
*
scale_height
),
"int64"
)
out_height
=
tt
.
cast
(
tt
.
ceil
(
height
*
scale_height
),
"int64"
)
out_width
=
T
.
cast
(
T
.
ceil
(
width
*
scale_width
),
"int64"
)
out_width
=
tt
.
cast
(
tt
.
ceil
(
width
*
scale_width
),
"int64"
)
grid
=
_meshgrid
(
out_height
,
out_width
)
grid
=
_meshgrid
(
out_height
,
out_width
)
# transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
# transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
t_g
=
T
.
dot
(
theta
,
grid
)
t_g
=
tt
.
dot
(
theta
,
grid
)
x_s
=
t_g
[:,
0
]
x_s
=
t_g
[:,
0
]
y_s
=
t_g
[:,
1
]
y_s
=
t_g
[:,
1
]
x_s_flat
=
x_s
.
flatten
()
x_s_flat
=
x_s
.
flatten
()
...
@@ -2816,7 +2808,7 @@ def test_dnn_spatialtf():
...
@@ -2816,7 +2808,7 @@ def test_dnn_spatialtf():
input_dim
,
x_s_flat
,
y_s_flat
,
out_height
,
out_width
,
border_mode
input_dim
,
x_s_flat
,
y_s_flat
,
out_height
,
out_width
,
border_mode
)
)
output
=
T
.
reshape
(
output
=
tt
.
reshape
(
input_transformed
,
(
num_batch
,
out_height
,
out_width
,
num_channels
)
input_transformed
,
(
num_batch
,
out_height
,
out_width
,
num_channels
)
)
)
output
=
output
.
dimshuffle
(
0
,
3
,
1
,
2
)
# dimshuffle to conv format
output
=
output
.
dimshuffle
(
0
,
3
,
1
,
2
)
# dimshuffle to conv format
...
@@ -2825,8 +2817,8 @@ def test_dnn_spatialtf():
...
@@ -2825,8 +2817,8 @@ def test_dnn_spatialtf():
def
_interpolate
(
im
,
x
,
y
,
out_height
,
out_width
,
border_mode
):
def
_interpolate
(
im
,
x
,
y
,
out_height
,
out_width
,
border_mode
):
# *_f are floats
# *_f are floats
num_batch
,
height
,
width
,
channels
=
im
.
shape
num_batch
,
height
,
width
,
channels
=
im
.
shape
height_f
=
T
.
cast
(
height
,
theano
.
config
.
floatX
)
height_f
=
tt
.
cast
(
height
,
theano
.
config
.
floatX
)
width_f
=
T
.
cast
(
width
,
theano
.
config
.
floatX
)
width_f
=
tt
.
cast
(
width
,
theano
.
config
.
floatX
)
# scale coordinates from [-1, 1] to [0, dimension - 1], where dimension
# scale coordinates from [-1, 1] to [0, dimension - 1], where dimension
# can be the width or height
# can be the width or height
...
@@ -2835,42 +2827,42 @@ def test_dnn_spatialtf():
...
@@ -2835,42 +2827,42 @@ def test_dnn_spatialtf():
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing.
# we need those in floatX for interpolation and in int64 for indexing.
x0_f
=
T
.
floor
(
x
)
x0_f
=
tt
.
floor
(
x
)
y0_f
=
T
.
floor
(
y
)
y0_f
=
tt
.
floor
(
y
)
x1_f
=
x0_f
+
1
x1_f
=
x0_f
+
1
y1_f
=
y0_f
+
1
y1_f
=
y0_f
+
1
# for indexing, we need to take care of the border mode for outside pixels.
# for indexing, we need to take care of the border mode for outside pixels.
if
border_mode
==
"nearest"
:
if
border_mode
==
"nearest"
:
x0
=
T
.
clip
(
x0_f
,
0
,
width_f
-
1
)
x0
=
tt
.
clip
(
x0_f
,
0
,
width_f
-
1
)
x1
=
T
.
clip
(
x1_f
,
0
,
width_f
-
1
)
x1
=
tt
.
clip
(
x1_f
,
0
,
width_f
-
1
)
y0
=
T
.
clip
(
y0_f
,
0
,
height_f
-
1
)
y0
=
tt
.
clip
(
y0_f
,
0
,
height_f
-
1
)
y1
=
T
.
clip
(
y1_f
,
0
,
height_f
-
1
)
y1
=
tt
.
clip
(
y1_f
,
0
,
height_f
-
1
)
elif
border_mode
==
"mirror"
:
elif
border_mode
==
"mirror"
:
w
=
2
*
(
width_f
-
1
)
w
=
2
*
(
width_f
-
1
)
x0
=
T
.
minimum
(
x0_f
%
w
,
-
x0_f
%
w
)
x0
=
tt
.
minimum
(
x0_f
%
w
,
-
x0_f
%
w
)
x1
=
T
.
minimum
(
x1_f
%
w
,
-
x1_f
%
w
)
x1
=
tt
.
minimum
(
x1_f
%
w
,
-
x1_f
%
w
)
h
=
2
*
(
height_f
-
1
)
h
=
2
*
(
height_f
-
1
)
y0
=
T
.
minimum
(
y0_f
%
h
,
-
y0_f
%
h
)
y0
=
tt
.
minimum
(
y0_f
%
h
,
-
y0_f
%
h
)
y1
=
T
.
minimum
(
y1_f
%
h
,
-
y1_f
%
h
)
y1
=
tt
.
minimum
(
y1_f
%
h
,
-
y1_f
%
h
)
elif
border_mode
==
"wrap"
:
elif
border_mode
==
"wrap"
:
x0
=
T
.
mod
(
x0_f
,
width_f
)
x0
=
tt
.
mod
(
x0_f
,
width_f
)
x1
=
T
.
mod
(
x1_f
,
width_f
)
x1
=
tt
.
mod
(
x1_f
,
width_f
)
y0
=
T
.
mod
(
y0_f
,
height_f
)
y0
=
tt
.
mod
(
y0_f
,
height_f
)
y1
=
T
.
mod
(
y1_f
,
height_f
)
y1
=
tt
.
mod
(
y1_f
,
height_f
)
else
:
else
:
raise
ValueError
(
raise
ValueError
(
"border_mode must be one of "
"'nearest', 'mirror', 'wrap'"
"border_mode must be one of "
"'nearest', 'mirror', 'wrap'"
)
)
x0
,
x1
,
y0
,
y1
=
(
T
.
cast
(
v
,
"int64"
)
for
v
in
(
x0
,
x1
,
y0
,
y1
))
x0
,
x1
,
y0
,
y1
=
(
tt
.
cast
(
v
,
"int64"
)
for
v
in
(
x0
,
x1
,
y0
,
y1
))
# The input is [num_batch, height, width, channels]. We do the lookup in
# The input is [num_batch, height, width, channels]. We do the lookup in
# the flattened input, i.e [num_batch*height*width, channels]. We need
# the flattened input, i.e [num_batch*height*width, channels]. We need
# to offset all indices to match the flat version
# to offset all indices to match the flat version
dim2
=
width
dim2
=
width
dim1
=
width
*
height
dim1
=
width
*
height
base
=
T
.
repeat
(
base
=
tt
.
repeat
(
T
.
arange
(
num_batch
,
dtype
=
"int64"
)
*
dim1
,
out_height
*
out_width
tt
.
arange
(
num_batch
,
dtype
=
"int64"
)
*
dim1
,
out_height
*
out_width
)
)
base_y0
=
base
+
y0
*
dim2
base_y0
=
base
+
y0
*
dim2
base_y1
=
base
+
y1
*
dim2
base_y1
=
base
+
y1
*
dim2
...
@@ -2891,16 +2883,16 @@ def test_dnn_spatialtf():
...
@@ -2891,16 +2883,16 @@ def test_dnn_spatialtf():
wb
=
((
x1_f
-
x
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
"x"
)
wb
=
((
x1_f
-
x
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
"x"
)
wc
=
((
x
-
x0_f
)
*
(
y1_f
-
y
))
.
dimshuffle
(
0
,
"x"
)
wc
=
((
x
-
x0_f
)
*
(
y1_f
-
y
))
.
dimshuffle
(
0
,
"x"
)
wd
=
((
x
-
x0_f
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
"x"
)
wd
=
((
x
-
x0_f
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
"x"
)
output
=
T
.
sum
([
wa
*
Ia
,
wb
*
Ib
,
wc
*
Ic
,
wd
*
Id
],
axis
=
0
)
output
=
tt
.
sum
([
wa
*
Ia
,
wb
*
Ib
,
wc
*
Ic
,
wd
*
Id
],
axis
=
0
)
return
output
return
output
def
_linspace
(
start
,
stop
,
num
):
def
_linspace
(
start
,
stop
,
num
):
# Theano linspace. Behaves similar to np.linspace
# Theano linspace. Behaves similar to np.linspace
start
=
T
.
cast
(
start
,
theano
.
config
.
floatX
)
start
=
tt
.
cast
(
start
,
theano
.
config
.
floatX
)
stop
=
T
.
cast
(
stop
,
theano
.
config
.
floatX
)
stop
=
tt
.
cast
(
stop
,
theano
.
config
.
floatX
)
num
=
T
.
cast
(
num
,
theano
.
config
.
floatX
)
num
=
tt
.
cast
(
num
,
theano
.
config
.
floatX
)
step
=
(
stop
-
start
)
/
(
num
-
1
)
step
=
(
stop
-
start
)
/
(
num
-
1
)
return
T
.
arange
(
num
,
dtype
=
theano
.
config
.
floatX
)
*
step
+
start
return
tt
.
arange
(
num
,
dtype
=
theano
.
config
.
floatX
)
*
step
+
start
def
_meshgrid
(
height
,
width
):
def
_meshgrid
(
height
,
width
):
# This function is the grid generator from eq. (1) in reference [1].
# This function is the grid generator from eq. (1) in reference [1].
...
@@ -2913,13 +2905,17 @@ def test_dnn_spatialtf():
...
@@ -2913,13 +2905,17 @@ def test_dnn_spatialtf():
# Note: If the image size is known at layer construction time, we could
# Note: If the image size is known at layer construction time, we could
# compute the meshgrid offline in numpy instead of doing it dynamically
# compute the meshgrid offline in numpy instead of doing it dynamically
# in Theano. However, it hardly affected performance when we tried.
# in Theano. However, it hardly affected performance when we tried.
x_t
=
T
.
dot
(
T
.
ones
((
height
,
1
)),
_linspace
(
-
1.0
,
1.0
,
width
)
.
dimshuffle
(
"x"
,
0
))
x_t
=
tt
.
dot
(
y_t
=
T
.
dot
(
_linspace
(
-
1.0
,
1.0
,
height
)
.
dimshuffle
(
0
,
"x"
),
T
.
ones
((
1
,
width
)))
tt
.
ones
((
height
,
1
)),
_linspace
(
-
1.0
,
1.0
,
width
)
.
dimshuffle
(
"x"
,
0
)
)
y_t
=
tt
.
dot
(
_linspace
(
-
1.0
,
1.0
,
height
)
.
dimshuffle
(
0
,
"x"
),
tt
.
ones
((
1
,
width
))
)
x_t_flat
=
x_t
.
reshape
((
1
,
-
1
))
x_t_flat
=
x_t
.
reshape
((
1
,
-
1
))
y_t_flat
=
y_t
.
reshape
((
1
,
-
1
))
y_t_flat
=
y_t
.
reshape
((
1
,
-
1
))
ones
=
T
.
ones_like
(
x_t_flat
)
ones
=
tt
.
ones_like
(
x_t_flat
)
grid
=
T
.
concatenate
([
x_t_flat
,
y_t_flat
,
ones
],
axis
=
0
)
grid
=
tt
.
concatenate
([
x_t_flat
,
y_t_flat
,
ones
],
axis
=
0
)
return
grid
return
grid
img_dims
=
(
5
,
3
,
16
,
16
)
img_dims
=
(
5
,
3
,
16
,
16
)
...
@@ -2933,8 +2929,8 @@ def test_dnn_spatialtf():
...
@@ -2933,8 +2929,8 @@ def test_dnn_spatialtf():
theta
=
np
.
asarray
(
img_dims
[
0
]
*
[
transform
],
dtype
=
theano
.
config
.
floatX
)
theta
=
np
.
asarray
(
img_dims
[
0
]
*
[
transform
],
dtype
=
theano
.
config
.
floatX
)
# Create symbolic variables for inputs and transformations
# Create symbolic variables for inputs and transformations
t_img
=
T
.
tensor4
(
"img"
)
t_img
=
tt
.
tensor4
(
"img"
)
t_theta
=
T
.
tensor3
(
"theta"
)
t_theta
=
tt
.
tensor3
(
"theta"
)
st_dnn
=
dnn
.
dnn_spatialtf
(
st_dnn
=
dnn
.
dnn_spatialtf
(
t_img
,
t_theta
,
scale_height
=
scale_height
,
scale_width
=
scale_width
t_img
,
t_theta
,
scale_height
=
scale_height
,
scale_width
=
scale_width
...
@@ -2963,8 +2959,8 @@ def test_dnn_spatialtf():
...
@@ -2963,8 +2959,8 @@ def test_dnn_spatialtf():
def
test_dnn_spatialtf_invalid_shapes
():
def
test_dnn_spatialtf_invalid_shapes
():
inputs
=
T
.
tensor4
(
"inputs"
)
inputs
=
tt
.
tensor4
(
"inputs"
)
theta
=
T
.
tensor3
(
"theta"
)
theta
=
tt
.
tensor3
(
"theta"
)
st_dnn
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
)
st_dnn
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
)
st_dnn_func
=
theano
.
function
([
inputs
,
theta
],
st_dnn
,
mode
=
mode_with_gpu
)
st_dnn_func
=
theano
.
function
([
inputs
,
theta
],
st_dnn
,
mode
=
mode_with_gpu
)
...
@@ -2994,13 +2990,13 @@ def test_dnn_spatialtf_invalid_shapes():
...
@@ -2994,13 +2990,13 @@ def test_dnn_spatialtf_invalid_shapes():
def
test_dnn_spatialtf_grad
():
def
test_dnn_spatialtf_grad
():
utt
.
seed_rng
()
utt
.
seed_rng
()
inputs
=
T
.
tensor4
(
"inputs"
)
inputs
=
tt
.
tensor4
(
"inputs"
)
theta
=
T
.
tensor3
(
"theta"
)
theta
=
tt
.
tensor3
(
"theta"
)
out
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
,
scale_height
=
0.25
,
scale_width
=
0.75
)
out
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
,
scale_height
=
0.25
,
scale_width
=
0.75
)
out_mean
=
T
.
mean
(
out
)
out_mean
=
tt
.
mean
(
out
)
mean_gi
=
T
.
grad
(
out_mean
,
[
inputs
])
mean_gi
=
tt
.
grad
(
out_mean
,
[
inputs
])
mean_gt
=
T
.
grad
(
out_mean
,
[
theta
])
mean_gt
=
tt
.
grad
(
out_mean
,
[
theta
])
f_gi
=
theano
.
function
([
inputs
,
theta
],
mean_gi
,
mode
=
mode_with_gpu
)
f_gi
=
theano
.
function
([
inputs
,
theta
],
mean_gi
,
mode
=
mode_with_gpu
)
assert
any
(
assert
any
(
...
@@ -3053,7 +3049,7 @@ def test_dnn_spatialtf_grad():
...
@@ -3053,7 +3049,7 @@ def test_dnn_spatialtf_grad():
class
TestDnnConv2DRuntimeAlgorithms
(
object
):
class
TestDnnConv2DRuntimeAlgorithms
(
object
):
ndim
=
2
ndim
=
2
cpu_conv_class
=
theano
.
tensor
.
nnet
.
corr
.
CorrMM
cpu_conv_class
=
CorrMM
runtime_shapes
=
[
runtime_shapes
=
[
(
3
,
[(
2
,
3
,
10
,
9
),
(
5
,
3
,
7
,
7
)]),
(
3
,
[(
2
,
3
,
10
,
9
),
(
5
,
3
,
7
,
7
)]),
(
1
,
[(
1
,
1
,
100
,
200
),
(
1
,
1
,
50
,
200
)]),
(
1
,
[(
1
,
1
,
100
,
200
),
(
1
,
1
,
50
,
200
)]),
...
@@ -3080,8 +3076,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3080,8 +3076,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
_broadcastable
=
[
False
]
*
(
2
+
self
.
ndim
)
_broadcastable
=
[
False
]
*
(
2
+
self
.
ndim
)
def
run_fwd_runtime_algorithm
(
algo
):
def
run_fwd_runtime_algorithm
(
algo
):
inputs
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
inputs
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
# Scale down the input values to prevent very large absolute errors
# Scale down the input values to prevent very large absolute errors
# due to float rounding
# due to float rounding
lower_inputs
=
inputs
/
10
lower_inputs
=
inputs
/
10
...
@@ -3127,8 +3123,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3127,8 +3123,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
def
run_gradinput_runtime_algorithm
(
algo
):
def
run_gradinput_runtime_algorithm
(
algo
):
theano
.
config
.
dnn
.
conv
.
algo_bwd_data
=
algo
theano
.
config
.
dnn
.
conv
.
algo_bwd_data
=
algo
inputs
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
inputs
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
conv
=
dnn
.
dnn_conv
(
conv
=
dnn
.
dnn_conv
(
img
=
inputs
,
img
=
inputs
,
kerns
=
filters
,
kerns
=
filters
,
...
@@ -3137,7 +3133,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3137,7 +3133,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
subsample
=
unit_shape
,
subsample
=
unit_shape
,
dilation
=
unit_shape
,
dilation
=
unit_shape
,
)
)
(
grad_i
,)
=
t
heano
.
tensor
.
grad
(
conv
.
sum
(),
[
inputs
])
(
grad_i
,)
=
t
t
.
grad
(
conv
.
sum
(),
[
inputs
])
f
=
theano
.
function
([
inputs
,
filters
],
grad_i
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
inputs
,
filters
],
grad_i
,
mode
=
mode_with_gpu
)
assert
1
==
len
(
assert
1
==
len
(
[
[
...
@@ -3161,7 +3157,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3161,7 +3157,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
ref_cast
(
inputs
),
flipped_filters
ref_cast
(
inputs
),
flipped_filters
)
)
(
grad_i_ref
,)
=
t
heano
.
tensor
.
grad
(
conv_ref
.
sum
(),
[
inputs
])
(
grad_i_ref
,)
=
t
t
.
grad
(
conv_ref
.
sum
(),
[
inputs
])
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_i_ref
,
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_i_ref
,
mode
=
"FAST_RUN"
)
runtime_shapes
=
self
.
runtime_shapes
runtime_shapes
=
self
.
runtime_shapes
if
algo
in
(
"time_once"
,
"guess_once"
):
if
algo
in
(
"time_once"
,
"guess_once"
):
...
@@ -3185,8 +3181,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3185,8 +3181,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
def
run_gradweight_runtime_algorithm
(
algo
):
def
run_gradweight_runtime_algorithm
(
algo
):
theano
.
config
.
dnn
.
conv
.
algo_bwd_filter
=
algo
theano
.
config
.
dnn
.
conv
.
algo_bwd_filter
=
algo
inputs
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
inputs
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
heano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
t
t
.
TensorType
(
dtype
,
_broadcastable
)()
conv
=
dnn
.
dnn_conv
(
conv
=
dnn
.
dnn_conv
(
img
=
inputs
,
img
=
inputs
,
kerns
=
filters
,
kerns
=
filters
,
...
@@ -3195,7 +3191,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3195,7 +3191,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
subsample
=
unit_shape
,
subsample
=
unit_shape
,
dilation
=
unit_shape
,
dilation
=
unit_shape
,
)
)
(
grad_w
,)
=
t
heano
.
tensor
.
grad
(
conv
.
sum
(),
[
filters
])
(
grad_w
,)
=
t
t
.
grad
(
conv
.
sum
(),
[
filters
])
f
=
theano
.
function
([
inputs
,
filters
],
grad_w
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
inputs
,
filters
],
grad_w
,
mode
=
mode_with_gpu
)
assert
1
==
len
(
assert
1
==
len
(
[
[
...
@@ -3219,7 +3215,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3219,7 +3215,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
ref_cast
(
inputs
),
flipped_filters
ref_cast
(
inputs
),
flipped_filters
)
)
(
grad_w_ref
,)
=
t
heano
.
tensor
.
grad
(
conv_ref
.
sum
(),
[
filters
])
(
grad_w_ref
,)
=
t
t
.
grad
(
conv_ref
.
sum
(),
[
filters
])
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_w_ref
,
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_w_ref
,
mode
=
"FAST_RUN"
)
runtime_shapes
=
self
.
runtime_shapes
runtime_shapes
=
self
.
runtime_shapes
if
algo
in
(
"time_once"
,
"guess_once"
):
if
algo
in
(
"time_once"
,
"guess_once"
):
...
@@ -3239,7 +3235,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
...
@@ -3239,7 +3235,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
class
TestDnnConv3DRuntimeAlgorithms
(
TestDnnConv2DRuntimeAlgorithms
):
class
TestDnnConv3DRuntimeAlgorithms
(
TestDnnConv2DRuntimeAlgorithms
):
ndim
=
3
ndim
=
3
cpu_conv_class
=
theano
.
tensor
.
nnet
.
corr3d
.
Corr3dMM
cpu_conv_class
=
Corr3dMM
runtime_shapes
=
[
runtime_shapes
=
[
(
3
,
[(
2
,
3
,
5
,
10
,
9
),
(
5
,
3
,
4
,
7
,
7
)]),
(
3
,
[(
2
,
3
,
5
,
10
,
9
),
(
5
,
3
,
4
,
7
,
7
)]),
(
1
,
[(
1
,
1
,
5
,
100
,
200
),
(
1
,
1
,
4
,
50
,
200
)]),
(
1
,
[(
1
,
1
,
5
,
100
,
200
),
(
1
,
1
,
4
,
50
,
200
)]),
...
@@ -3293,9 +3289,9 @@ def test_conv_guess_once_with_dtypes():
...
@@ -3293,9 +3289,9 @@ def test_conv_guess_once_with_dtypes():
def
test_opt_f16_prec32
():
def
test_opt_f16_prec32
():
inputs
=
T
.
TensorType
(
"float16"
,
(
False
,)
*
4
)()
inputs
=
tt
.
TensorType
(
"float16"
,
(
False
,)
*
4
)()
filters
=
T
.
TensorType
(
"float16"
,
(
False
,)
*
4
)()
filters
=
tt
.
TensorType
(
"float16"
,
(
False
,)
*
4
)()
conv
=
T
.
nnet
.
conv2d
(
inputs
,
filters
)
conv
=
conv2d
(
inputs
,
filters
)
gfilt
=
theano
.
grad
(
conv
.
sum
(),
filters
)
gfilt
=
theano
.
grad
(
conv
.
sum
(),
filters
)
...
...
tests/gpuarray/test_extra_ops.py
浏览文件 @
b4dc02d6
...
@@ -3,12 +3,11 @@ import pytest
...
@@ -3,12 +3,11 @@ import pytest
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
tt
from
functools
import
partial
from
functools
import
partial
from
itertools
import
product
from
itertools
import
product
from
theano
import
tensor
as
T
from
theano.tensor.extra_ops
import
CumOp
from
theano.tensor.extra_ops
import
CumOp
from
theano.gpuarray.extra_ops
import
GpuCumOp
from
theano.gpuarray.extra_ops
import
GpuCumOp
from
theano.gpuarray.type
import
get_context
from
theano.gpuarray.type
import
get_context
...
@@ -33,13 +32,13 @@ class TestGpuCumOp(TestCumOp):
...
@@ -33,13 +32,13 @@ class TestGpuCumOp(TestCumOp):
# The CPU implementation is not so accurate, which throws out DebugMode.
# The CPU implementation is not so accurate, which throws out DebugMode.
# Since propagating .tag.values_eq_approx to the output of every
# Since propagating .tag.values_eq_approx to the output of every
# GpuFromHost seems overkill, we just relax the rtol for these tests
# GpuFromHost seems overkill, we just relax the rtol for these tests
self
.
old_rtol
=
t
heano
.
tensor
.
float32_rtol
self
.
old_rtol
=
t
t
.
float32_rtol
t
heano
.
tensor
.
basic
.
float32_rtol
*=
2
t
t
.
float32_rtol
*=
2
def
teardown_method
(
self
):
def
teardown_method
(
self
):
super
()
.
teardown_method
()
super
()
.
teardown_method
()
# Restore rtol
# Restore rtol
t
heano
.
tensor
.
basic
.
float32_rtol
=
self
.
old_rtol
t
t
.
float32_rtol
=
self
.
old_rtol
@pytest.mark.skipif
(
@pytest.mark.skipif
(
theano
.
config
.
floatX
!=
"float32"
,
theano
.
config
.
floatX
!=
"float32"
,
...
@@ -48,7 +47,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -48,7 +47,7 @@ class TestGpuCumOp(TestCumOp):
@pytest.mark.parametrized
(
"mode"
,
[
"mul"
,
"add"
])
@pytest.mark.parametrized
(
"mode"
,
[
"mul"
,
"add"
])
def
test_infer_shape
(
self
,
mode
):
def
test_infer_shape
(
self
,
mode
):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
x
=
T
.
tensor3
(
"x"
)
x
=
tt
.
tensor3
(
"x"
)
a
=
np
.
random
.
random
((
3
,
5
,
2
))
.
astype
(
theano
.
config
.
floatX
)
a
=
np
.
random
.
random
((
3
,
5
,
2
))
.
astype
(
theano
.
config
.
floatX
)
for
axis
in
range
(
-
len
(
a
.
shape
),
len
(
a
.
shape
)):
for
axis
in
range
(
-
len
(
a
.
shape
),
len
(
a
.
shape
)):
...
@@ -58,7 +57,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -58,7 +57,7 @@ class TestGpuCumOp(TestCumOp):
def
test_Strides1D
(
self
,
mode
):
def
test_Strides1D
(
self
,
mode
):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
x
=
T
.
fvector
(
"x"
)
x
=
tt
.
fvector
(
"x"
)
for
axis
in
[
0
,
None
,
-
1
]:
for
axis
in
[
0
,
None
,
-
1
]:
a
=
np
.
random
.
random
((
42
,))
.
astype
(
"float32"
)
a
=
np
.
random
.
random
((
42
,))
.
astype
(
"float32"
)
...
@@ -89,7 +88,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -89,7 +88,7 @@ class TestGpuCumOp(TestCumOp):
def
test_Strides2D
(
self
,
mode
):
def
test_Strides2D
(
self
,
mode
):
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
x
=
T
.
fmatrix
(
"x"
)
x
=
tt
.
fmatrix
(
"x"
)
for
axis
in
[
0
,
1
,
None
,
-
1
,
-
2
]:
for
axis
in
[
0
,
1
,
None
,
-
1
,
-
2
]:
a
=
np
.
random
.
random
((
42
,
30
))
.
astype
(
"float32"
)
a
=
np
.
random
.
random
((
42
,
30
))
.
astype
(
"float32"
)
...
@@ -120,7 +119,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -120,7 +119,7 @@ class TestGpuCumOp(TestCumOp):
def
test_Strides3D
(
self
,
mode
):
def
test_Strides3D
(
self
,
mode
):
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
np_func
=
dict
(
add
=
np
.
cumsum
,
mul
=
np
.
cumprod
)[
mode
]
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
x
=
T
.
ftensor3
(
"x"
)
x
=
tt
.
ftensor3
(
"x"
)
for
axis
in
[
0
,
1
,
2
,
None
,
-
1
,
-
2
,
-
3
]:
for
axis
in
[
0
,
1
,
2
,
None
,
-
1
,
-
2
,
-
3
]:
a
=
np
.
random
.
random
((
42
,
30
,
25
))
.
astype
(
"float32"
)
a
=
np
.
random
.
random
((
42
,
30
,
25
))
.
astype
(
"float32"
)
...
@@ -153,7 +152,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -153,7 +152,7 @@ class TestGpuCumOp(TestCumOp):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
block_max_size
=
self
.
max_threads_dim0
*
2
block_max_size
=
self
.
max_threads_dim0
*
2
x
=
T
.
fvector
(
"x"
)
x
=
tt
.
fvector
(
"x"
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
0
)(
x
),
mode
=
self
.
mode
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
0
)(
x
),
mode
=
self
.
mode
)
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
...
@@ -176,7 +175,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -176,7 +175,7 @@ class TestGpuCumOp(TestCumOp):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
block_max_size
=
self
.
max_threads_dim0
*
2
block_max_size
=
self
.
max_threads_dim0
*
2
x
=
T
.
fmatrix
(
"x"
)
x
=
tt
.
fmatrix
(
"x"
)
for
shape_axis
,
axis
in
zip
([
0
,
1
,
0
,
1
,
0
],
[
0
,
1
,
None
,
-
1
,
-
2
]):
for
shape_axis
,
axis
in
zip
([
0
,
1
,
0
,
1
,
0
],
[
0
,
1
,
None
,
-
1
,
-
2
]):
f
=
theano
.
function
([
x
],
op_class
(
axis
=
axis
)(
x
),
mode
=
self
.
mode
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
axis
)(
x
),
mode
=
self
.
mode
)
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
...
@@ -217,7 +216,7 @@ class TestGpuCumOp(TestCumOp):
...
@@ -217,7 +216,7 @@ class TestGpuCumOp(TestCumOp):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
block_max_size
=
self
.
max_threads_dim0
*
2
block_max_size
=
self
.
max_threads_dim0
*
2
x
=
T
.
ftensor3
(
"x"
)
x
=
tt
.
ftensor3
(
"x"
)
for
shape_axis
,
axis
in
zip
([
0
,
1
,
2
,
0
,
2
,
1
,
0
],
[
0
,
1
,
2
,
None
,
-
1
,
-
2
,
-
3
]):
for
shape_axis
,
axis
in
zip
([
0
,
1
,
2
,
0
,
2
,
1
,
0
],
[
0
,
1
,
2
,
None
,
-
1
,
-
2
,
-
3
]):
f
=
theano
.
function
([
x
],
op_class
(
axis
=
axis
)(
x
),
mode
=
self
.
mode
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
axis
)(
x
),
mode
=
self
.
mode
)
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
GpuCumOp
)]
...
@@ -267,6 +266,6 @@ class TestGpuCumOp(TestCumOp):
...
@@ -267,6 +266,6 @@ class TestGpuCumOp(TestCumOp):
def
test_GpuCumOp4D
(
self
,
mode
):
def
test_GpuCumOp4D
(
self
,
mode
):
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
op_class
=
partial
(
self
.
op_class
,
mode
=
mode
)
# Should not use the GPU version.
# Should not use the GPU version.
x
=
T
.
ftensor4
(
"x"
)
x
=
tt
.
ftensor4
(
"x"
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
1
)(
x
),
mode
=
self
.
mode
)
f
=
theano
.
function
([
x
],
op_class
(
axis
=
1
)(
x
),
mode
=
self
.
mode
)
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
CumOp
)]
assert
[
n
for
n
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
n
.
op
,
CumOp
)]
tests/gpuarray/test_fft.py
浏览文件 @
b4dc02d6
...
@@ -3,7 +3,7 @@ import numpy as np
...
@@ -3,7 +3,7 @@ import numpy as np
import
pytest
import
pytest
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
import
theano.gpuarray.fft
import
theano.gpuarray.fft
from
theano.gpuarray.fft
import
pygpu_available
,
skcuda_available
,
pycuda_available
from
theano.gpuarray.fft
import
pygpu_available
,
skcuda_available
,
pycuda_available
...
@@ -27,7 +27,7 @@ class TestFFT:
...
@@ -27,7 +27,7 @@ class TestFFT:
def
test_1Dfft
(
self
):
def
test_1Dfft
(
self
):
inputs_val
=
np
.
random
.
random
((
1
,
N
))
.
astype
(
"float32"
)
inputs_val
=
np
.
random
.
random
((
1
,
N
))
.
astype
(
"float32"
)
x
=
T
.
matrix
(
"x"
,
dtype
=
"float32"
)
x
=
tt
.
matrix
(
"x"
,
dtype
=
"float32"
)
rfft
=
theano
.
gpuarray
.
fft
.
curfft
(
x
)
rfft
=
theano
.
gpuarray
.
fft
.
curfft
(
x
)
f_rfft
=
theano
.
function
([
x
],
rfft
,
mode
=
mode_with_gpu
)
f_rfft
=
theano
.
function
([
x
],
rfft
,
mode
=
mode_with_gpu
)
res_rfft
=
f_rfft
(
inputs_val
)
res_rfft
=
f_rfft
(
inputs_val
)
...
...
tests/gpuarray/test_nnet.py
浏览文件 @
b4dc02d6
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
import
tests.unittest_tools
as
utt
import
tests.unittest_tools
as
utt
from
theano.tensor.nnet
import
crossentropy_softmax_1hot_with_bias_dx
from
theano.gpuarray.nnet
import
(
from
theano.gpuarray.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuSoftmaxWithBias
,
GpuSoftmaxWithBias
,
GpuSoftmax
,
GpuSoftmax
,
)
)
from
tests.gpuarray.config
import
mode_with_gpu
,
mode_without_gpu
from
tests.gpuarray.config
import
mode_with_gpu
,
mode_without_gpu
mode_wo_cudnn
=
mode_with_gpu
.
excluding
(
"cudnn"
)
mode_wo_cudnn
=
mode_with_gpu
.
excluding
(
"cudnn"
)
...
@@ -29,16 +29,16 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -29,16 +29,16 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
n_in
=
4098
n_in
=
4098
n_out
=
4099
n_out
=
4099
y
=
T
.
lvector
(
"y"
)
y
=
tt
.
lvector
(
"y"
)
b
=
T
.
fvector
(
"b"
)
b
=
tt
.
fvector
(
"b"
)
# we precompute the dot with big shape before to allow the test of
# we precompute the dot with big shape before to allow the test of
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
# (the launch timed out and was terminated) on GPU card not
# (the launch timed out and was terminated) on GPU card not
# powerful enough. We need the big shape to check for corner
# powerful enough. We need the big shape to check for corner
# case.
# case.
dot_result
=
T
.
fmatrix
(
"dot_result"
)
dot_result
=
tt
.
fmatrix
(
"dot_result"
)
# Seed numpy.random with config.unittests.rseed
# Seed numpy.random with config.unittests.rseed
utt
.
seed_rng
()
utt
.
seed_rng
()
...
@@ -50,10 +50,10 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -50,10 +50,10 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
dot_value
=
np
.
asarray
(
np
.
dot
(
xx
,
W_values
),
dtype
=
"float32"
)
dot_value
=
np
.
asarray
(
np
.
dot
(
xx
,
W_values
),
dtype
=
"float32"
)
del
W_values
del
W_values
p_y_given_x
=
T
.
nnet
.
softmax
(
dot_result
+
b
)
p_y_given_x
=
tt
.
nnet
.
softmax
(
dot_result
+
b
)
y_pred
=
T
.
argmax
(
p_y_given_x
,
axis
=-
1
)
y_pred
=
tt
.
argmax
(
p_y_given_x
,
axis
=-
1
)
loss
=
-
T
.
mean
(
T
.
log
(
p_y_given_x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])
loss
=
-
tt
.
mean
(
tt
.
log
(
p_y_given_x
)[
tt
.
arange
(
y
.
shape
[
0
]),
y
])
dW
=
T
.
grad
(
loss
,
dot_result
)
dW
=
tt
.
grad
(
loss
,
dot_result
)
classify
=
theano
.
function
(
classify
=
theano
.
function
(
inputs
=
[
y
,
b
,
dot_result
],
outputs
=
[
loss
,
y_pred
,
dW
],
mode
=
mode_without_gpu
inputs
=
[
y
,
b
,
dot_result
],
outputs
=
[
loss
,
y_pred
,
dW
],
mode
=
mode_without_gpu
)
)
...
@@ -63,7 +63,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -63,7 +63,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
assert
any
(
assert
any
(
[
[
isinstance
(
node
.
op
,
T
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
isinstance
(
node
.
op
,
tt
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
for
node
in
classify
.
maker
.
fgraph
.
toposort
()
for
node
in
classify
.
maker
.
fgraph
.
toposort
()
]
]
)
)
...
@@ -100,11 +100,9 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
...
@@ -100,11 +100,9 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
dnll_value
=
np
.
asarray
(
np
.
random
.
rand
(
batch_size
),
dtype
=
"float32"
)
dnll_value
=
np
.
asarray
(
np
.
random
.
rand
(
batch_size
),
dtype
=
"float32"
)
y_idx_value
=
np
.
random
.
randint
(
low
=
0
,
high
=
5
,
size
=
batch_size
)
y_idx_value
=
np
.
random
.
randint
(
low
=
0
,
high
=
5
,
size
=
batch_size
)
softmax_output
=
T
.
fmatrix
()
softmax_output
=
tt
.
fmatrix
()
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
softmax_output
.
shape
[
1
],
1
)
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
softmax_output
.
shape
[
1
],
1
)
op
=
theano
.
tensor
.
nnet
.
crossentropy_softmax_1hot_with_bias_dx
(
op
=
crossentropy_softmax_1hot_with_bias_dx
(
dnll_value
,
softmax_output
,
y_idx_value
)
dnll_value
,
softmax_output
,
y_idx_value
)
cpu_f
=
theano
.
function
([
softmax_output
],
op
,
mode
=
mode_without_gpu
)
cpu_f
=
theano
.
function
([
softmax_output
],
op
,
mode
=
mode_without_gpu
)
gpu_f
=
theano
.
function
([
softmax_output
],
op
,
mode
=
mode_with_gpu
)
gpu_f
=
theano
.
function
([
softmax_output
],
op
,
mode
=
mode_with_gpu
)
...
@@ -113,7 +111,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
...
@@ -113,7 +111,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
assert
any
(
assert
any
(
[
[
isinstance
(
node
.
op
,
T
.
nnet
.
CrossentropySoftmax1HotWithBiasDx
)
isinstance
(
node
.
op
,
tt
.
nnet
.
CrossentropySoftmax1HotWithBiasDx
)
for
node
in
cpu_f
.
maker
.
fgraph
.
toposort
()
for
node
in
cpu_f
.
maker
.
fgraph
.
toposort
()
]
]
)
)
...
@@ -156,14 +154,14 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
...
@@ -156,14 +154,14 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
# TODO: check that we loop when there are too many threads. (THIS IS
# TODO: check that we loop when there are too many threads. (THIS IS
# NOT IMPLEMENTED)
# NOT IMPLEMENTED)
x
=
T
.
matrix
(
"x"
,
dtype
=
dtypeInput
)
x
=
tt
.
matrix
(
"x"
,
dtype
=
dtypeInput
)
b
=
T
.
vector
(
"b"
,
dtype
=
dtypeBias
)
b
=
tt
.
vector
(
"b"
,
dtype
=
dtypeBias
)
z
=
T
.
nnet
.
softmax_with_bias
(
x
,
b
)
z
=
tt
.
nnet
.
softmax_with_bias
(
x
,
b
)
f
=
theano
.
function
([
x
,
b
],
z
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
x
,
b
],
z
,
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
([
x
,
b
],
z
,
mode
=
mode_with_gpu
)
f_gpu
=
theano
.
function
([
x
,
b
],
z
,
mode
=
mode_with_gpu
)
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
T
.
nnet
.
softmax_with_bias
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
tt
.
nnet
.
softmax_with_bias
assert
isinstance
(
f_gpu
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSoftmaxWithBias
)
assert
isinstance
(
f_gpu
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSoftmaxWithBias
)
def
cmp
(
n
,
m
):
def
cmp
(
n
,
m
):
...
@@ -209,12 +207,12 @@ def softmax_unittest_template(dtypeInput):
...
@@ -209,12 +207,12 @@ def softmax_unittest_template(dtypeInput):
# We check that we loop when their is too much block
# We check that we loop when their is too much block
# We use slower code when there isn't enough shared memory
# We use slower code when there isn't enough shared memory
x
=
T
.
matrix
(
"x"
,
dtype
=
dtypeInput
)
x
=
tt
.
matrix
(
"x"
,
dtype
=
dtypeInput
)
z
=
T
.
nnet
.
softmax
(
x
)
z
=
tt
.
nnet
.
softmax
(
x
)
f
=
theano
.
function
([
x
],
z
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
x
],
z
,
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
([
x
],
z
,
mode
=
mode_wo_cudnn
)
f_gpu
=
theano
.
function
([
x
],
z
,
mode
=
mode_wo_cudnn
)
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
T
.
nnet
.
softmax_op
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
tt
.
nnet
.
softmax_op
assert
isinstance
(
f_gpu
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSoftmax
)
assert
isinstance
(
f_gpu
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSoftmax
)
def
cmp
(
n
,
m
):
def
cmp
(
n
,
m
):
...
@@ -256,7 +254,7 @@ class TestSoftMax:
...
@@ -256,7 +254,7 @@ class TestSoftMax:
f
=
theano
.
function
([
x
],
f_z_out
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
x
],
f_z_out
,
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
([
x_gpu
],
f_gpu_z_out
,
mode
=
self
.
mode
)
f_gpu
=
theano
.
function
([
x_gpu
],
f_gpu_z_out
,
mode
=
self
.
mode
)
self
.
_check_types
(
f
,
f_gpu
,
T
.
nnet
.
Softmax
,
self
.
gpu_op
)
self
.
_check_types
(
f
,
f_gpu
,
tt
.
nnet
.
Softmax
,
self
.
gpu_op
)
# we need to test n>32*1024 to check that we make the block loop.
# we need to test n>32*1024 to check that we make the block loop.
cmp
(
1
,
5
,
f
,
f_gpu
)
cmp
(
1
,
5
,
f
,
f_gpu
)
...
@@ -303,16 +301,16 @@ class TestSoftMax:
...
@@ -303,16 +301,16 @@ class TestSoftMax:
)
)
def
test_softmax
(
self
):
def
test_softmax
(
self
):
x
=
T
.
fmatrix
(
"x"
)
x
=
tt
.
fmatrix
(
"x"
)
z
=
T
.
nnet
.
softmax_op
z
=
tt
.
nnet
.
softmax_op
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
self
.
_cmp
(
2
<<
15
,
5
,
f
,
f_gpu
)
self
.
_cmp
(
2
<<
15
,
5
,
f
,
f_gpu
)
def
test_softmax_shape_0
(
self
):
def
test_softmax_shape_0
(
self
):
x
=
T
.
fmatrix
(
"x"
)
x
=
tt
.
fmatrix
(
"x"
)
z
=
T
.
nnet
.
softmax_op
z
=
tt
.
nnet
.
softmax_op
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
# Theano can handle that case, but cudnn can't
# Theano can handle that case, but cudnn can't
...
...
tests/gpuarray/test_reduction.py
浏览文件 @
b4dc02d6
...
@@ -5,7 +5,7 @@ import pytest
...
@@ -5,7 +5,7 @@ import pytest
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
tt
from
theano.gpuarray
import
GpuArrayType
from
theano.gpuarray
import
GpuArrayType
from
theano.gpuarray.reduction
import
GpuMaxAndArgmax
from
theano.gpuarray.reduction
import
GpuMaxAndArgmax
...
@@ -96,7 +96,7 @@ class BaseTest:
...
@@ -96,7 +96,7 @@ class BaseTest:
def
get_host_tensor
(
self
):
def
get_host_tensor
(
self
):
broadcastable
=
(
False
,)
*
self
.
tensor_size
broadcastable
=
(
False
,)
*
self
.
tensor_size
return
T
.
tensor
(
self
.
dtype
,
broadcastable
)
return
tt
.
tensor
(
self
.
dtype
,
broadcastable
)
def
get_gpu_tensor
(
self
):
def
get_gpu_tensor
(
self
):
broadcastable
=
(
False
,)
*
self
.
tensor_size
broadcastable
=
(
False
,)
*
self
.
tensor_size
...
@@ -116,7 +116,7 @@ class BaseTest:
...
@@ -116,7 +116,7 @@ class BaseTest:
M
=
self
.
get_host_tensor
()
M
=
self
.
get_host_tensor
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
M
],
[
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
[
tt
.
max
(
M
,
axis
=
axis
),
tt
.
argmax
(
M
,
axis
=
axis
)],
name
=
"shape:"
+
str
(
test_tensor
.
shape
)
+
"/axis:"
+
str
(
axis
)
+
"/HOST"
,
name
=
"shape:"
+
str
(
test_tensor
.
shape
)
+
"/axis:"
+
str
(
axis
)
+
"/HOST"
,
mode
=
mode_without_gpu
,
mode
=
mode_without_gpu
,
)
)
...
@@ -131,7 +131,7 @@ class BaseTest:
...
@@ -131,7 +131,7 @@ class BaseTest:
M
=
self
.
get_gpu_tensor
()
M
=
self
.
get_gpu_tensor
()
f
=
theano
.
function
(
f
=
theano
.
function
(
[
M
],
[
M
],
[
T
.
max
(
M
,
axis
=
axis
),
T
.
argmax
(
M
,
axis
=
axis
)],
[
tt
.
max
(
M
,
axis
=
axis
),
tt
.
argmax
(
M
,
axis
=
axis
)],
name
=
"shape:"
+
str
(
test_gpu_tensor
.
shape
)
+
"/axis:"
+
str
(
axis
)
+
"/GPU"
,
name
=
"shape:"
+
str
(
test_gpu_tensor
.
shape
)
+
"/axis:"
+
str
(
axis
)
+
"/GPU"
,
mode
=
mode_with_gpu
,
mode
=
mode_with_gpu
,
)
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论