Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9ad79667
提交
9ad79667
authored
5月 05, 2014
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1835 from nouiz/gpureduce
Gpureduce: support multiple dtype, prod, max and min
上级
43a86c9e
254dd8b7
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
57 行增加
和
27 行删除
+57
-27
pfunc.py
theano/compile/pfunc.py
+1
-2
link.py
theano/gof/link.py
+1
-1
check_blas.py
theano/misc/check_blas.py
+1
-1
opt.py
theano/sandbox/cuda/opt.py
+3
-3
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+0
-0
opt.py
theano/sandbox/gpuarray/opt.py
+19
-7
test_elemwise.py
theano/sandbox/gpuarray/tests/test_elemwise.py
+20
-6
test_opt.py
theano/sandbox/gpuarray/tests/test_opt.py
+8
-6
basic.py
theano/scalar/basic.py
+4
-1
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+0
-0
没有找到文件。
theano/compile/pfunc.py
浏览文件 @
9ad79667
...
...
@@ -364,8 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
that are neither in "updates" nor in "no_default_updates".
:type name: None or string
:param name: attaches a name to the Profiling result of this function when
using ProfileMode (will be deprecated).
:param name: attaches a name to the profiling result of this function.
:type allow_input_downcast: Boolean
:param allow_input_downcast: True means that the values passed as
...
...
theano/gof/link.py
浏览文件 @
9ad79667
...
...
@@ -258,7 +258,7 @@ class Container(object):
"""WRITEME
:Parameters:
`r`: a
variabl
e
`r`: a
Variable or a Typ
e
`storage`: a list of length 1, whose element is the value for `r`
`readonly`: True indicates that this should not be setable by Function[r] = val
`strict`: if True, we don't allow type casting.
...
...
theano/misc/check_blas.py
浏览文件 @
9ad79667
...
...
@@ -215,7 +215,7 @@ if __name__ == "__main__":
C1060 0.46s
GTX Titan(D15U-50)0.06s 0.06s don't work
GTX 680
0.12s 0.154s 0.218s
GTX 680
0.11s
0.12s 0.154s 0.218s
GTX 580 0.16s 0.16s 0.164s 0.203s
GTX 480 0.19s 0.19s 0.192s 0.237s 0.27s
GTX 470 0.23s 0.23s 0.238s 0.297s 0.34s
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
9ad79667
...
...
@@ -442,7 +442,7 @@ def local_gpu_lazy_ifelse(node):
@register_opt
()
@local_optimizer
([
gpu_from_host
,
tensor
.
blas
.
_d
ot22
])
@local_optimizer
([
gpu_from_host
,
tensor
.
blas
.
D
ot22
])
def
local_gpu_dot22
(
node
):
"""
gpu_from_host(dot22) -> gpudot(gpu_from_host)
...
...
@@ -465,7 +465,7 @@ def local_gpu_dot22(node):
@register_opt
()
@local_optimizer
([
gpu_from_host
,
tensor
.
blas
.
_dot22s
calar
])
@local_optimizer
([
gpu_from_host
,
tensor
.
blas
.
Dot22S
calar
])
def
local_gpu_dot22scalar
(
node
):
"""
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
...
...
@@ -571,7 +571,7 @@ def local_gpu_ger(node):
@register_opt
()
@local_optimizer
([
tensor
.
blas
.
gemm_no_inplace
,
gpu_from_host
])
@local_optimizer
([
tensor
.
blas
.
Gemm
,
gpu_from_host
])
def
local_gpu_gemm
(
node
):
"""
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
9ad79667
差异被折叠。
点击展开。
theano/sandbox/gpuarray/opt.py
浏览文件 @
9ad79667
...
...
@@ -344,14 +344,15 @@ def local_gpua_advanced_incsubtensor(node):
@register_opt
()
@op_lifter
([
tensor
.
CAReduce
,
tensor
.
Sum
])
@op_lifter
([
tensor
.
CAReduce
,
tensor
.
Sum
,
tensor
.
elemwise
.
Prod
])
def
local_gpua_careduce
(
node
):
if
(
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Add
)
or
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Mul
)):
if
isinstance
(
node
.
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
,
scalar
.
Maximum
,
scalar
.
Minimum
)):
x
,
=
node
.
inputs
greduce
=
GpuCAReduceCuda
(
node
.
op
.
scalar_op
,
axis
=
node
.
op
.
axis
)
if
x
.
dtype
!=
"float32"
:
return
greduce
=
GpuCAReduceCuda
(
node
.
op
.
scalar_op
,
axis
=
node
.
op
.
axis
,
dtype
=
getattr
(
node
.
op
,
'dtype'
,
None
),
acc_dtype
=
getattr
(
node
.
op
,
'acc_dtype'
,
None
))
gvar
=
greduce
(
x
)
#We need to have the make node called, otherwise the mask can
#be None
...
...
@@ -384,10 +385,21 @@ def local_gpua_careduce(node):
else
:
new_mask
.
append
(
reduce_mask
[
i
])
new_in_shp
.
append
(
x_shape
[
i
])
new_axis
=
[]
for
idx
,
m
in
enumerate
(
new_mask
):
if
m
==
1
:
new_axis
.
append
(
idx
)
new_greduce
=
GpuCAReduceCuda
(
node
.
op
.
scalar_op
,
axis
=
new_axis
,
reduce_mask
=
new_mask
,
dtype
=
getattr
(
node
.
op
,
'dtype'
,
None
),
acc_dtype
=
getattr
(
node
.
op
,
'acc_dtype'
,
None
))
new_greduce
=
GpuCAReduceCuda
(
new_mask
,
scalar_op
)
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
*
new_in_shp
))
gpu_reshaped_x
=
gpu_from_host
(
reshaped_x
)
gvar
=
greduce
(
gpu_reshaped_x
)
#We need to have the make node called, otherwise the mask can
#be None
reshaped_gpu_inputs
=
[
gpu_reshaped_x
]
if
new_greduce
.
supports_c_code
(
reshaped_gpu_inputs
):
reduce_reshaped_x
=
host_from_gpu
(
...
...
theano/sandbox/gpuarray/tests/test_elemwise.py
浏览文件 @
9ad79667
...
...
@@ -2,9 +2,10 @@ from theano import scalar, gof
from
theano.gof.python25
import
all
,
any
from
theano.tensor.tests.test_elemwise
import
(
test_Broadcast
,
test_DimShuffle
,
test_CAReduce
)
test_CAReduce
,
T_reduce_dtype
)
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
rand_gpuarray
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
(
mode_with_gpu
,
rand_gpuarray
)
from
theano.sandbox.gpuarray.elemwise
import
(
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduceCuda
,
GpuCAReduceCPY
)
from
theano.sandbox.gpuarray.type
import
GpuArrayType
...
...
@@ -47,6 +48,8 @@ class test_GpuCAReduceCPY(test_CAReduce):
def
test_perform_nan
(
self
):
for
dtype
in
self
.
dtypes
:
if
not
dtype
.
startswith
(
'float'
):
continue
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
PerformLinker
(),
op
,
dtype
=
dtype
,
test_nan
=
True
)
...
...
@@ -58,6 +61,8 @@ class test_GpuCAReduceCPY(test_CAReduce):
def
test_c_nan
(
self
):
for
dtype
in
self
.
dtypes
:
if
not
dtype
.
startswith
(
'float'
):
continue
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
CLinker
(),
op
,
dtype
=
dtype
,
test_nan
=
True
)
...
...
@@ -68,9 +73,9 @@ class test_GpuCAReduceCPY(test_CAReduce):
class
test_GpuCAReduceCuda
(
test_GpuCAReduceCPY
):
dtypes
=
[
"float32"
]
dtypes
=
[
"float32"
,
"int64"
]
bin_dtypes
=
[
"uint8"
,
"int8"
]
bin_dtypes
=
[]
cases
=
[((
5
,
6
),
None
),
((
5
,
6
),
(
0
,
1
)),
((
5
,
6
),
(
0
,
)),
...
...
@@ -129,9 +134,10 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
((
4100
,
4
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
0
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
0
,
2
,
3
]),
#((4,3,2,4100),[0,2,3]),#1011
((
4100
,
4
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
4100
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
4100
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
2
,
4100
),[
1
,
2
,
3
]),
#0111
((
65
,
4
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
65
,
3
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
65
,
2
),[
1
,
2
,
3
]),((
4
,
3
,
2
,
65
),[
1
,
2
,
3
]),
#0111
((
4100
,
2
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
4100
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4100
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4
,
4100
),[
0
,
1
,
2
,
3
]),((
128
,
1
,
3
,
3
),
[
0
,
1
,
2
,
3
]),
#1111
((
4100
,
2
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
4100
,
3
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4100
,
4
),[
0
,
1
,
2
,
3
]),((
2
,
3
,
4
,
4100
),[
0
,
1
,
2
,
3
]),((
128
,
1
,
2
,
3
),
[
0
,
1
,
2
,
3
]),
#1111
#test pattern implemented by reshape
#Skip them as this test the op directly, not the optimization with reshape
# ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
# ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
# ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
...
...
@@ -140,10 +146,18 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
# ((5,4,3,10,11),[1,2]),
]
op
=
GpuCAReduceCuda
reds
=
[
scalar
.
add
,
scalar
.
mul
]
reds
=
[
scalar
.
add
,
scalar
.
mul
,
scalar
.
maximum
,
scalar
.
minimum
]
def
test_perform
(
self
):
return
def
test_perform_nan
(
self
):
return
class
T_gpureduce_dtype
(
T_reduce_dtype
):
mode
=
mode_with_gpu
.
excluding
(
'local_cut_useless_reduce'
)
op
=
GpuCAReduceCuda
#Currently we don't support reduction on 0 axis
axes
=
[
None
,
0
,
1
,
1
,
[
0
],
[
1
],
[
0
,
1
]]
theano/sandbox/gpuarray/tests/test_opt.py
浏览文件 @
9ad79667
...
...
@@ -46,16 +46,18 @@ def test_flatten():
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
def
test_
sum_prod
():
for
method
in
[
'sum'
]:
def
test_
reduce
():
for
method
in
[
'sum'
,
'prod'
,
'max'
,
'min'
]:
m
=
theano
.
tensor
.
fmatrix
()
f
=
theano
.
function
([
m
],
getattr
(
m
,
method
)(),
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
m
],
getattr
(
m
,
method
)(
axis
=
0
),
mode
=
mode_with_gpu
)
val
=
numpy
.
random
.
rand
(
10
,
11
)
.
astype
(
"float32"
)
res
=
f
(
val
)
utt
.
assert_allclose
(
res
,
val
.
sum
())
assert
res
.
shape
==
()
utt
.
assert_allclose
(
res
,
getattr
(
val
,
method
)(
axis
=
0
))
assert
res
.
shape
==
(
11
,)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
GpuCAReduceCuda
in
[
type
(
node
.
op
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
for
node
in
topo
],
topo
def
test_local_gpualloc_memset_0
():
...
...
theano/scalar/basic.py
浏览文件 @
9ad79667
...
...
@@ -2335,7 +2335,10 @@ class Expm1(UnaryScalarOp):
def
c_code
(
self
,
node
,
name
,
(
x
,
),
(
z
,
),
sub
):
if
node
.
inputs
[
0
]
.
type
in
complex_types
:
raise
NotImplementedError
(
'type not supported'
,
type
)
return
"
%(z)
s = exp(
%(x)
s) - 1;"
%
locals
()
return
"
%(z)
s = expm1(
%(x)
s);"
%
locals
()
def
c_code_cache_version
(
self
):
return
(
5
,)
expm1
=
Expm1
(
upgrade_to_float
,
name
=
'expm1'
)
...
...
theano/tensor/tests/test_elemwise.py
浏览文件 @
9ad79667
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论