Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6657d35b
提交
6657d35b
authored
9月 24, 2012
作者:
goodfeli
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #947 from nouiz/gpu_reduce
Gpu reduce small change.
上级
2196f4a4
4135b1d9
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
197 行增加
和
350 行删除
+197
-350
__init__.py
theano/sandbox/cuda/__init__.py
+6
-1
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+4
-5
test_basic_ops.py
theano/sandbox/cuda/tests/test_basic_ops.py
+187
-344
没有找到文件。
theano/sandbox/cuda/__init__.py
浏览文件 @
6657d35b
...
@@ -373,7 +373,12 @@ def use(device,
...
@@ -373,7 +373,12 @@ def use(device,
if
test_driver
:
if
test_driver
:
import
theano.sandbox.cuda.tests.test_driver
import
theano.sandbox.cuda.tests.test_driver
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
device_properties
(
use
.
device_number
)[
"warpSize"
]
!=
32
:
raise
ValueError
(
"Your GPU have a warpSize of 32. Currently"
" we have code that depend on this. Email"
" Theano mailing list to tell us about"
" this new GPU as we don't know any with"
" this properties"
)
if
move_shared_float32_to_gpu
:
if
move_shared_float32_to_gpu
:
handle_shared_float32
(
True
)
handle_shared_float32
(
True
)
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
6657d35b
...
@@ -624,8 +624,8 @@ class GpuCAReduce(GpuOp):
...
@@ -624,8 +624,8 @@ class GpuCAReduce(GpuOp):
# but tensor.elemwise.CAReduce has this exact same check so I guess
# but tensor.elemwise.CAReduce has this exact same check so I guess
# this is OK to do
# this is OK to do
if
self
.
scalar_op
in
[
scal
.
minimum
,
scal
.
maximum
]:
if
self
.
scalar_op
in
[
scal
.
minimum
,
scal
.
maximum
]:
conds
=
[]
for
i
in
xrange
(
nd_in
):
for
i
in
xrange
(
nd_in
):
conds
=
[]
if
self
.
reduce_mask
[
i
]:
if
self
.
reduce_mask
[
i
]:
conds
.
append
(
"(CudaNdarray_HOST_DIMS(
%(x)
s)[
%(i)
s] == 0)"
%
locals
())
conds
.
append
(
"(CudaNdarray_HOST_DIMS(
%(x)
s)[
%(i)
s] == 0)"
%
locals
())
assert
len
(
conds
)
>
0
assert
len
(
conds
)
>
0
...
@@ -723,7 +723,7 @@ class GpuCAReduce(GpuOp):
...
@@ -723,7 +723,7 @@ class GpuCAReduce(GpuOp):
if (verbose)
if (verbose)
printf("running kernel_reduce_10_
%(name)
s
\\
n");
printf("running kernel_reduce_10_
%(name)
s
\\
n");
int n_shared = sizeof(float) * n_threads.x;
int n_shared = sizeof(float) * n_threads.x
* n_threads.y * n_threads.z
;
kernel_reduce_10_
%(name)
s<<<n_blocks, n_threads,
kernel_reduce_10_
%(name)
s<<<n_blocks, n_threads,
n_shared>>>(
n_shared>>>(
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
...
@@ -862,11 +862,10 @@ class GpuCAReduce(GpuOp):
...
@@ -862,11 +862,10 @@ class GpuCAReduce(GpuOp):
extern __shared__ float buf[];
extern __shared__ float buf[];
float myresult = 0.0f;
float myresult = 0.0f;
//This is caught in cuda/init.py when we init the gpu. I keep
//it here to ease finding code that rely on this.
if (warpSize != 32)
if (warpSize != 32)
{
{
// TODO: set error code
// 2012-09-20 IG: as of today, Fred says he will check
// this elsewhere, in a different PR
Z[0] = -666;
Z[0] = -666;
return;
return;
}
}
...
...
theano/sandbox/cuda/tests/test_basic_ops.py
浏览文件 @
6657d35b
...
@@ -42,7 +42,15 @@ def tes_use():
...
@@ -42,7 +42,15 @@ def tes_use():
tcn
.
use
()
tcn
.
use
()
def
test_sum
():
def
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
):
gpu_pattern
=
[
0
for
elem
in
shape
]
for
idx
in
pattern
:
gpu_pattern
[
idx
]
=
1
gpu_pattern
=
tuple
(
gpu_pattern
)
return
gpu_pattern
def
test_careduce
():
"""
"""
test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
0011, 0101, 0111, 1011, 1111
0011, 0101, 0111, 1011, 1111
...
@@ -56,363 +64,198 @@ def test_sum():
...
@@ -56,363 +64,198 @@ def test_sum():
TODO: test with broadcast
TODO: test with broadcast
"""
"""
for
shape
,
pattern
in
[((
1
,
1
),(
1
,)),
for
scalar_op
in
[
theano
.
scalar
.
add
,
theano
.
scalar
.
maximum
]:
((
1
,
0
),(
1
,)),
for
shape
,
pattern
in
[((
1
,
1
),(
1
,)),
((
0
,
1
),(
1
,)),
((
1
,
0
),(
1
,)),
((
0
,
0
),(
1
,)),
((
0
,
1
),(
1
,)),
((
0
,
0
,
0
),(
1
,
2
)),
((
0
,
0
),(
1
,)),
((
0
,
0
,
0
,
0
),(
1
,
2
,
3
)),
((
0
,
0
,
0
),(
1
,
2
)),
((
2
,
1
),(
1
,)),
((
0
,
0
,
0
,
0
),(
1
,
2
,
3
)),
((
1
,
2
),(
1
,)),
((
2
,
1
),(
1
,)),
((
100
,
3
,
1300
),[
1
]),
((
1
,
2
),(
1
,)),
((
0
,),[
0
]),((
5
,),[
0
]),
((
100
,
3
,
1300
),[
1
]),
((
0
,
0
),[
0
,
1
]),((
1
,
0
),[
0
,
1
]),((
5
,
4
),[
0
,
1
]),((
33
,
31
),[
0
,
1
]),((
5
,
4
),[
1
]),((
5
,
4
),[
0
]),
#need something bigger then 32 for some opt test.
((
0
,),[
0
]),((
5
,),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
1
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
1
,
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
0
,
0
),[
0
,
1
]),((
1
,
0
),[
0
,
1
]),((
5
,
4
),[
0
,
1
]),((
33
,
31
),[
0
,
1
]),((
5
,
4
),[
1
]),((
5
,
4
),[
0
]),
#need something bigger then 32 for some opt test.
((
0
,
0
,
0
,
0
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
1
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
1
,
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
((
0
,
0
,
0
,
0
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
10
,
11
),[
1
,
2
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
((
5
,
4
,
3
,
10
,
11
),[
1
,
2
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((
4100
,
3
),[
0
]),((
3
,
4101
),[
0
]),
#10
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((
1024
,
33
),[
0
]),((
33
,
1024
),[
0
]),
#10
((
4100
,
3
),[
0
]),((
3
,
4101
),[
0
]),
#10
((
1025
,
33
),[
0
]),((
33
,
1025
),[
0
]),
#10
((
1024
,
33
),[
0
]),((
33
,
1024
),[
0
]),
#10
((
1025
,
33
),[
0
]),((
33
,
1025
),[
0
]),
#10
((
4100
,
3
),[
1
]),((
3
,
4101
),[
1
]),
#01
((
1024
,
33
),[
1
]),((
33
,
1024
),[
1
]),
#01
((
4100
,
3
),[
1
]),((
3
,
4101
),[
1
]),
#01
((
1025
,
33
),[
1
]),((
33
,
1025
),[
1
]),
#01
((
1024
,
33
),[
1
]),((
33
,
1024
),[
1
]),
#01
((
1025
,
33
),[
1
]),((
33
,
1025
),[
1
]),
#01
((
4100
,
3
),[
0
,
1
]),((
3
,
4101
),[
0
,
1
]),
#11
((
1024
,
33
),[
0
,
1
]),((
33
,
1024
),[
0
,
1
]),
#01
((
4100
,
3
),[
0
,
1
]),((
3
,
4101
),[
0
,
1
]),
#11
((
1025
,
33
),[
0
,
1
]),((
33
,
1025
),[
0
,
1
]),
#01
((
1024
,
33
),[
0
,
1
]),((
33
,
1024
),[
0
,
1
]),
#01
((
1025
,
33
),[
0
,
1
]),((
33
,
1025
),[
0
,
1
]),
#01
((
4100
,
4
,
3
),[
0
]),((
5
,
4100
,
3
),[
0
]),((
5
,
4
,
4100
),[
0
]),
#100
((
4100
,
4
,
3
),[
1
]),((
5
,
4100
,
3
),[
1
]),((
5
,
4
,
4100
),[
1
]),
#010
((
4100
,
4
,
3
),[
0
]),((
5
,
4100
,
3
),[
0
]),((
5
,
4
,
4100
),[
0
]),
#100
((
4100
,
4
,
3
),[
2
]),((
5
,
4100
,
3
),[
2
]),((
5
,
4
,
4100
),[
2
]),
#001
((
4100
,
4
,
3
),[
1
]),((
5
,
4100
,
3
),[
1
]),((
5
,
4
,
4100
),[
1
]),
#010
((
4100
,
4
,
3
),[
0
,
1
]),((
5
,
4100
,
3
),[
0
,
1
]),((
5
,
4
,
4100
),[
0
,
1
]),
#110
((
4100
,
4
,
3
),[
2
]),((
5
,
4100
,
3
),[
2
]),((
5
,
4
,
4100
),[
2
]),
#001
((
4100
,
4
,
3
),[
1
,
2
]),((
5
,
4100
,
3
),[
1
,
2
]),((
5
,
4
,
4100
),[
1
,
2
]),
#011
((
4100
,
4
,
3
),[
0
,
1
]),((
5
,
4100
,
3
),[
0
,
1
]),((
5
,
4
,
4100
),[
0
,
1
]),
#110
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((
4100
,
4
,
3
),[
1
,
2
]),((
5
,
4100
,
3
),[
1
,
2
]),((
5
,
4
,
4100
),[
1
,
2
]),
#011
((
4100
,
4
,
3
),[
0
,
1
,
2
]),((
5
,
4100
,
3
),[
0
,
1
,
2
]),((
5
,
4
,
4100
),[
0
,
1
,
2
]),
#111
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((
4100
,
4
,
3
),[
0
,
1
,
2
]),((
5
,
4100
,
3
),[
0
,
1
,
2
]),((
5
,
4
,
4100
),[
0
,
1
,
2
]),
#111
((
4100
,
4
,
3
,
2
),[
2
,
3
]),((
4
,
4100
,
3
,
2
),[
2
,
3
]),((
4
,
3
,
4100
,
2
),[
2
,
3
]),((
4
,
3
,
2
,
4100
),[
2
,
3
]),
#0011
((
4100
,
4
,
3
,
2
),[
1
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
3
]),
#0101
((
4100
,
4
,
3
,
2
),[
2
,
3
]),((
4
,
4100
,
3
,
2
),[
2
,
3
]),((
4
,
3
,
4100
,
2
),[
2
,
3
]),((
4
,
3
,
2
,
4100
),[
2
,
3
]),
#0011
((
4100
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
0
,
2
,
3
]),
#((4,3,2,4100),[0,2,3]),#1011
((
4100
,
4
,
3
,
2
),[
1
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
3
]),
#0101
((
4100
,
4
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
2
,
3
]),
#0111
((
4100
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
0
,
2
,
3
]),
#((4,3,2,4100),[0,2,3]),#1011
((
4100
,
2
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
4100
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4100
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4
,
4100
),[
0
,
1
,
2
,
3
]),
#1111
((
4100
,
4
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
2
,
3
]),
#0111
((
4100
,
2
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
4100
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4100
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4
,
4100
),[
0
,
1
,
2
,
3
]),
#1111
#test pattern implemented by reshape
((
4100
,
4
,
3
,
2
),[
0
]),((
4
,
4100
,
3
,
2
),[
0
]),((
4
,
3
,
4100
,
2
),[
0
]),((
4
,
3
,
2
,
4100
),[
0
]),
#1000
#test pattern implemented by reshape
((
4100
,
4
,
3
,
2
),[
1
]),((
4
,
4100
,
3
,
2
),[
1
]),((
4
,
3
,
4100
,
2
),[
1
]),((
4
,
3
,
2
,
4100
),[
1
]),
#0100
((
4100
,
4
,
3
,
2
),[
0
]),((
4
,
4100
,
3
,
2
),[
0
]),((
4
,
3
,
4100
,
2
),[
0
]),((
4
,
3
,
2
,
4100
),[
0
]),
#1000
((
4100
,
4
,
3
,
2
),[
2
]),((
4
,
4100
,
3
,
2
),[
2
]),((
4
,
3
,
4100
,
2
),[
2
]),((
4
,
3
,
2
,
4100
),[
2
]),
#0010
((
4100
,
4
,
3
,
2
),[
1
]),((
4
,
4100
,
3
,
2
),[
1
]),((
4
,
3
,
4100
,
2
),[
1
]),((
4
,
3
,
2
,
4100
),[
1
]),
#0100
((
4100
,
4
,
3
,
2
),[
3
]),((
4
,
4100
,
3
,
2
),[
3
]),((
4
,
3
,
4100
,
2
),[
3
]),((
4
,
3
,
2
,
4100
),[
3
]),
#0001
((
4100
,
4
,
3
,
2
),[
2
]),((
4
,
4100
,
3
,
2
),[
2
]),((
4
,
3
,
4100
,
2
),[
2
]),((
4
,
3
,
2
,
4100
),[
2
]),
#0010
((
1100
,
2
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
1100
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
1100
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
1100
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
5
,
1100
),[
0
,
1
,
2
,
3
,
4
]),
#11111
((
4100
,
4
,
3
,
2
),[
3
]),((
4
,
4100
,
3
,
2
),[
3
]),((
4
,
3
,
4100
,
2
),[
3
]),((
4
,
3
,
2
,
4100
),[
3
]),
#0001
((
1100
,
2
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
1100
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
1100
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
1100
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
5
,
1100
),[
0
,
1
,
2
,
3
,
4
]),
#11111
]:
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
]:
b
=
T
.
Sum
(
pattern
)(
a
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
op
=
tensor
.
CAReduce
(
scalar_op
,
axis
=
pattern
)
# val = numpy.ones(shape)
pat
=
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
#GpuCAReduce{maximum} support only those patterns
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
if
scalar_op
is
theano
.
scalar
.
maximum
and
pat
not
in
[
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)]:
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
continue
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
if
val
.
size
==
0
:
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
)
else
:
try
:
#We raise the error threashold as we sum big matrix
#and this cause small rounding difference with some seed
#example in debug mode with unittests.rseed=9275
orig_rtol
=
theano
.
tensor
.
basic
.
float32_rtol
theano
.
tensor
.
basic
.
float32_rtol
=
2e-5
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]),
f2
(
val
),
f
(
val
),
val
)
finally
:
theano
.
tensor
.
basic
.
float32_rtol
=
orig_rtol
#test with dimshuffle
#we shuffle the 2 outer dims.
for
shape
,
pattern
in
[
#((5,),[0]),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
dim_pattern
=
range
(
len
(
shape
))
dim_pattern
[
0
]
=
1
dim_pattern
[
1
]
=
0
a
=
a
.
dimshuffle
(
dim_pattern
)
b
=
T
.
Sum
(
pattern
)(
a
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
#test with broadcast
for
shape
,
pattern
in
[((
5
,),[
0
]),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
shape
=
numpy
.
asarray
(
shape
)
*
2
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
a2
=
tcn
.
CudaNdarrayType
((
False
,)
*
len
(
shape
))()
b
=
T
.
Sum
(
pattern
)(
a
)
b2
=
T
.
Sum
(
pattern
)(
a2
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val2
=
cuda
.
CudaNdarray
(
val
)
if
len
(
shape
)
==
1
:
val
=
val
[::
2
]
val2
=
val2
[::
2
]
elif
len
(
shape
)
==
2
:
val
=
val
[::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
]
elif
len
(
shape
)
==
3
:
val
=
val
[::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
]
elif
len
(
shape
)
==
4
:
val
=
val
[::
2
,
::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
,
::
2
]
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
f2
=
theano
.
function
([
a2
],
b2
,
mode
=
mode_with_gpu
)
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
_allclose
(
f2
(
val2
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
def
test_max
():
"""
test GpuMax pattern 01, 011, 0111 (tensor.max pattern (1,), (1,2), (1,2,3) )
TODO: are others currently implemented by reshape?
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
"""
b
=
op
(
a
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
op
.
__class__
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
f_caused_value_error
=
False
try
:
f_out
=
f
(
val
)
except
ValueError
,
e
:
exc
=
e
f_caused_value_error
=
True
def
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
):
gpu_pattern
=
[
0
for
elem
in
shape
]
for
idx
in
pattern
:
gpu_pattern
[
idx
]
=
1
gpu_pattern
=
tuple
(
gpu_pattern
)
return
gpu_pattern
known_fail
=
False
for
shape
,
pattern
in
[((
1
,
1
),(
1
,)),
((
1
,
0
),(
1
,)),
((
0
,
1
),(
1
,)),
((
0
,
0
),(
1
,)),
((
0
,
0
,
0
),(
1
,
2
)),
((
0
,
0
,
0
,
0
),(
1
,
2
,
3
)),
((
2
,
1
),(
1
,)),
((
1
,
2
),(
1
,)),
((
100
,
3
,
1300
),[
1
]),
((
0
,),[
0
]),((
5
,),[
0
]),
((
0
,
0
),[
0
,
1
]),((
1
,
0
),[
0
,
1
]),((
5
,
4
),[
0
,
1
]),((
33
,
31
),[
0
,
1
]),((
5
,
4
),[
1
]),((
5
,
4
),[
0
]),
#need something bigger then 32 for some opt test.
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
1
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
1
,
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
0
,
0
,
0
,
0
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
((
5
,
4
,
3
,
10
,
11
),[
1
,
2
]),
((
5
,
4
,
3
,
20
),[
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
5
,
4
,
3
,
2
),[
1
,
2
,
3
]),
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((
4100
,
3
),[
0
]),((
3
,
4101
),[
0
]),
#10
((
1024
,
33
),[
0
]),((
33
,
1024
),[
0
]),
#10
((
1025
,
33
),[
0
]),((
33
,
1025
),[
0
]),
#10
((
4100
,
3
),[
1
]),((
3
,
4101
),[
1
]),
#01
((
1024
,
33
),[
1
]),((
33
,
1024
),[
1
]),
#01
((
1025
,
33
),[
1
]),((
33
,
1025
),[
1
]),
#01
((
4100
,
3
),[
0
,
1
]),((
3
,
4101
),[
0
,
1
]),
#11
((
1024
,
33
),[
0
,
1
]),((
33
,
1024
),[
0
,
1
]),
#01
((
1025
,
33
),[
0
,
1
]),((
33
,
1025
),[
0
,
1
]),
#01
((
4100
,
4
,
3
),[
0
]),((
5
,
4100
,
3
),[
0
]),((
5
,
4
,
4100
),[
0
]),
#100
((
4100
,
4
,
3
),[
1
]),((
5
,
4100
,
3
),[
1
]),((
5
,
4
,
4100
),[
1
]),
#010
((
4100
,
4
,
3
),[
2
]),((
5
,
4100
,
3
),[
2
]),((
5
,
4
,
4100
),[
2
]),
#001
((
4100
,
4
,
3
),[
0
,
1
]),((
5
,
4100
,
3
),[
0
,
1
]),((
5
,
4
,
4100
),[
0
,
1
]),
#110
((
4100
,
4
,
3
),[
1
,
2
]),((
5
,
4100
,
3
),[
1
,
2
]),((
5
,
4
,
4100
),[
1
,
2
]),
#011
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((
4100
,
4
,
3
),[
0
,
1
,
2
]),((
5
,
4100
,
3
),[
0
,
1
,
2
]),((
5
,
4
,
4100
),[
0
,
1
,
2
]),
#111
((
4100
,
4
,
3
,
2
),[
2
,
3
]),((
4
,
4100
,
3
,
2
),[
2
,
3
]),((
4
,
3
,
4100
,
2
),[
2
,
3
]),((
4
,
3
,
2
,
4100
),[
2
,
3
]),
#0011
((
4100
,
4
,
3
,
2
),[
1
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
3
]),
#0101
((
4100
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
0
,
2
,
3
]),
#((4,3,2,4100),[0,2,3]),#1011
((
4100
,
4
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
2
,
3
]),
#0111
((
4100
,
2
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
4100
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4100
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4
,
4100
),[
0
,
1
,
2
,
3
]),
#1111
#test pattern implemented by reshape
((
4100
,
4
,
3
,
2
),[
0
]),((
4
,
4100
,
3
,
2
),[
0
]),((
4
,
3
,
4100
,
2
),[
0
]),((
4
,
3
,
2
,
4100
),[
0
]),
#1000
((
4100
,
4
,
3
,
2
),[
1
]),((
4
,
4100
,
3
,
2
),[
1
]),((
4
,
3
,
4100
,
2
),[
1
]),((
4
,
3
,
2
,
4100
),[
1
]),
#0100
((
4100
,
4
,
3
,
2
),[
2
]),((
4
,
4100
,
3
,
2
),[
2
]),((
4
,
3
,
4100
,
2
),[
2
]),((
4
,
3
,
2
,
4100
),[
2
]),
#0010
((
4100
,
4
,
3
,
2
),[
3
]),((
4
,
4100
,
3
,
2
),[
3
]),((
4
,
3
,
4100
,
2
),[
3
]),((
4
,
3
,
2
,
4100
),[
3
]),
#0001
((
1100
,
2
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
1100
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
1100
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
1100
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
5
,
1100
),[
0
,
1
,
2
,
3
,
4
]),
#11111
]:
# Don't test patterns that aren't implemented for max yet
if
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
not
in
\
[
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)
]:
continue
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
b
=
T
.
max
(
a
,
pattern
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f_caused_value_error
=
False
try
:
f_out
=
f
(
val
)
except
ValueError
,
e
:
exc
=
e
f_caused_value_error
=
True
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
try
:
f2_out
=
f2
(
val
)
f2_caused_value_error
=
False
f2_caused_value_error
=
False
except
ValueError
,
e
:
try
:
exc2
=
e
f2_out
=
f2
(
val
)
f2_caused_value_error
=
True
except
ValueError
,
e
:
exc2
=
e
f2_caused_value_error
=
True
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
T
.
CAReduce
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
if
f_caused_value_error
!=
f2_caused_value_error
:
if
f_caused_value_error
:
print
'f caused this value error:'
print
exc
else
:
print
'f did not raise a value error, but should have'
if
f2_caused_value_error
:
print
'f2 caused this value error:'
print
exc2
else
:
print
'f should not have raised a value error'
print
'shape was: '
,
shape
print
'pattern was: '
,
pattern
assert
False
# Check that 0 shape matrices are invalid in the same cases
if
f_caused_value_error
!=
f2_caused_value_error
:
if
f_caused_value_error
:
print
'f caused this value error:'
print
exc
else
:
print
'f did not raise a value error, but should have'
if
f2_caused_value_error
:
print
'f2 caused this value error:'
print
exc2
else
:
print
'f should not have raised a value error'
print
'shape was: '
,
shape
print
'pattern was: '
,
pattern
assert
False
if
f_caused_value_error
:
continue
if
val
.
size
==
0
:
assert
f2
(
val
)
.
size
==
f
(
val
)
.
size
assert
f2
(
val
)
.
shape
==
f
(
val
)
.
shape
else
:
try
:
try
:
#We raise the error threashold as we sum big matrix
#We raise the error threashold as we sum big matrix
#and this cause small rounding difference with some seed
#and this cause small rounding difference with some seed
#example in debug mode with unittests.rseed=9275
#example in debug mode with unittests.rseed=9275
orig_rtol
=
theano
.
tensor
.
basic
.
float32_rtol
orig_rtol
=
theano
.
tensor
.
basic
.
float32_rtol
theano
.
tensor
.
basic
.
float32_rtol
=
2e-5
theano
.
tensor
.
basic
.
float32_rtol
=
2e-5
f2_val
=
f2
(
val
)
assert
_allclose
(
f_out
,
f2_out
),
(
'shape'
,
shape
,
f_val
=
f
(
val
)
'pattern'
,
pattern
,
if
not
_allclose
(
f2_val
,
f_val
):
sum
([
shape
[
i
]
for
i
in
pattern
]),
print
'failed for the following arguments: '
f2
(
val
),
f
(
val
),
val
)
print
'shape:'
,
shape
print
'pattern: '
,
pattern
print
'input:'
print
val
print
'correct output: '
print
f2_val
print
'actual output: '
print
f_val
assert
False
finally
:
finally
:
theano
.
tensor
.
basic
.
float32_rtol
=
orig_rtol
theano
.
tensor
.
basic
.
float32_rtol
=
orig_rtol
#test with dimshuffle
#test with dimshuffle
#we shuffle the 2 outer dims.
#we shuffle the 2 outer dims.
for
shape
,
pattern
in
[
#((5,),[0]),
for
shape
,
pattern
in
[
#((5,),[0]),
((
5
,
4
),(
0
,
1
)),((
5
,
4
),[
0
]),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
# Don't test patterns that aren't implemented for max yet
op
=
tensor
.
CAReduce
(
scalar_op
,
axis
=
pattern
)
if
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
not
in
\
pat
=
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
[
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)
]:
#GpuCAReduce{maximum} support only those patterns
continue
if
scalar_op
is
theano
.
scalar
.
maximum
and
pat
not
in
[
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)]:
dim_pattern
=
range
(
len
(
shape
))
continue
dim_pattern
[
0
]
=
1
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
dim_pattern
[
1
]
=
0
dim_pattern
=
range
(
len
(
shape
))
a
=
a
.
dimshuffle
(
dim_pattern
)
dim_pattern
[
0
]
=
1
b
=
T
.
max
(
a
,
pattern
)
dim_pattern
[
1
]
=
0
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
a
=
a
.
dimshuffle
(
dim_pattern
)
# val = numpy.ones(shape)
b
=
op
(
a
)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
# val = numpy.ones(shape)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
assert
T
.
CAReduce
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
'pattern'
,
pattern
,
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
sum
([
shape
[
i
]
for
i
in
pattern
]))
assert
op
.
__class__
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
#test with broadcast
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
for
shape
,
pattern
in
[((
5
,),(
0
,)),
'pattern'
,
pattern
,
((
5
,
4
),(
0
,
1
)),
sum
([
shape
[
i
]
for
i
in
pattern
]))
((
5
,
4
),(
0
,)),
((
5
,
4
,
3
),(
0
,)),
#test with broadcast
((
5
,
4
,
3
),(
0
,
1
)),
for
shape
,
pattern
in
[((
5
,),[
0
]),
((
5
,
4
,
3
),(
2
,)),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),(
0
,
1
,
2
)),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),
((
5
,
4
,
3
,
2
),(
0
,
1
,
2
,
3
)),
((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),(
0
,
2
,
3
))]:
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
# Don't test patterns that aren't implemented for max yet
op
=
tensor
.
CAReduce
(
scalar_op
,
axis
=
pattern
)
if
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
not
in
\
pat
=
tensor_pattern_to_gpu_pattern
(
shape
,
pattern
)
[
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)
]:
#GpuCAReduce{maximum} support only those patterns
continue
if
scalar_op
is
theano
.
scalar
.
maximum
and
pat
not
in
[
shape
=
numpy
.
asarray
(
shape
)
*
2
(
0
,
1
),
(
0
,
1
,
1
),
(
0
,
1
,
1
)]:
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
continue
a2
=
tcn
.
CudaNdarrayType
((
False
,)
*
len
(
shape
))()
shape
=
numpy
.
asarray
(
shape
)
*
2
b
=
T
.
max
(
a
,
pattern
)
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
b2
=
T
.
max
(
a2
,
pattern
)
a2
=
tcn
.
CudaNdarrayType
((
False
,)
*
len
(
shape
))()
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
b
=
op
(
a
)
# val = numpy.ones(shape)
b2
=
op
(
a2
)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
# val = numpy.ones(shape)
val2
=
cuda
.
CudaNdarray
(
val
)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
if
len
(
shape
)
==
1
:
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val
=
val
[::
2
]
val2
=
cuda
.
CudaNdarray
(
val
)
val2
=
val2
[::
2
]
if
len
(
shape
)
==
1
:
elif
len
(
shape
)
==
2
:
val
=
val
[::
2
]
val
=
val
[::
2
,
::
2
]
val2
=
val2
[::
2
]
val2
=
val2
[::
2
,
::
2
]
elif
len
(
shape
)
==
2
:
elif
len
(
shape
)
==
3
:
val
=
val
[::
2
,
::
2
]
val
=
val
[::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
]
elif
len
(
shape
)
==
3
:
elif
len
(
shape
)
==
4
:
val
=
val
[::
2
,
::
2
,
::
2
]
val
=
val
[::
2
,
::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
,
::
2
]
elif
len
(
shape
)
==
4
:
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
val
=
val
[::
2
,
::
2
,
::
2
,
::
2
]
f2
=
theano
.
function
([
a2
],
b2
,
mode
=
mode_with_gpu
)
val2
=
val2
[::
2
,
::
2
,
::
2
,
::
2
]
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
T
.
CAReduce
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
f2
=
theano
.
function
([
a2
],
b2
,
mode
=
mode_with_gpu
)
assert
_allclose
(
f2
(
val2
),
f
(
val
)),
(
'shape'
,
shape
,
assert
tcn
.
GpuCAReduce
in
[
x
.
op
.
__class__
'pattern'
,
pattern
,
for
x
in
f2
.
maker
.
fgraph
.
toposort
()]
sum
([
shape
[
i
]
for
i
in
pattern
]))
assert
op
.
__class__
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
assert
_allclose
(
f2
(
val2
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
def
test_flatten
():
def
test_flatten
():
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论