Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
91c8fcf3
提交
91c8fcf3
authored
9月 24, 2012
作者:
nouiz
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #934 from goodfeli/gpu_max
Gpu max
上级
a5965456
c3d358c5
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
42 行增加
和
27 行删除
+42
-27
cop.txt
doc/extending/cop.txt
+5
-0
__init__.py
theano/sandbox/cuda/__init__.py
+1
-1
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+0
-0
opt.py
theano/sandbox/cuda/opt.py
+24
-21
test_basic_ops.py
theano/sandbox/cuda/tests/test_basic_ops.py
+0
-0
test_driver.py
theano/sandbox/cuda/tests/test_driver.py
+4
-1
test_opt.py
theano/sandbox/cuda/tests/test_opt.py
+2
-2
basic.py
theano/scalar/basic.py
+3
-1
elemwise.py
theano/tensor/elemwise.py
+3
-1
没有找到文件。
doc/extending/cop.txt
浏览文件 @
91c8fcf3
...
...
@@ -33,6 +33,11 @@ There are less methods to define for an Op than for a Type:
This must return C code that carries the computation we want to do.
sub is a dictionary of strings for you to substitute into your code.
It's not clear if it ever contains anything other than 'fail'.
sub['fail'] is a string of code that you should execute (after calling
PyErr_Format) if your C code needs to raise an exception.
.. method:: c_code_cleanup(node, name, input_names, output_names, sub)
This must return C code that cleans up whatever c_code allocated and
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
91c8fcf3
...
...
@@ -270,7 +270,7 @@ if cuda_available:
import
basic_ops
from
basic_ops
import
(
GpuFromHost
,
HostFromGpu
,
GpuElemwise
,
GpuDimShuffle
,
Gpu
Sum
,
GpuReshape
,
GpuContiguous
,
GpuDimShuffle
,
Gpu
CAReduce
,
GpuReshape
,
GpuContiguous
,
GpuSubtensor
,
GpuIncSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuFlatten
,
GpuShape
,
GpuAlloc
,
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
91c8fcf3
差异被折叠。
点击展开。
theano/sandbox/cuda/opt.py
浏览文件 @
91c8fcf3
...
...
@@ -582,9 +582,12 @@ def local_gpu_gemm(node):
@register_opt
()
@local_optimizer
([])
def
local_gpu_
sum
(
node
):
def
local_gpu_
careduce
(
node
):
if
isinstance
(
node
.
op
,
tensor
.
elemwise
.
CAReduce
):
if
node
.
op
.
scalar_op
==
scal
.
add
:
scalar_op
=
node
.
op
.
scalar_op
# currently, only these two ops are supported at all,
# and max does not support all combinations of axes
if
node
.
op
.
scalar_op
in
[
scal
.
add
,
scal
.
maximum
]:
x
,
=
node
.
inputs
if
x
.
owner
and
x
.
owner
.
op
==
host_from_gpu
:
if
node
.
op
.
axis
is
None
:
...
...
@@ -594,22 +597,21 @@ def local_gpu_sum(node):
for
a
in
node
.
op
.
axis
:
assert
reduce_mask
[
a
]
==
0
reduce_mask
[
a
]
=
1
gsum
=
GpuSum
(
reduce_mask
)
pattern
=
(
''
.
join
(
str
(
i
)
for
i
in
reduce_mask
))
if
hasattr
(
gsum
,
'c_code_reduce_
%
s'
%
pattern
):
rval
=
host_from_gpu
(
gsum
(
gpu_from_host
(
x
)))
greduce
=
GpuCAReduce
(
reduce_mask
,
scalar_op
)
if
greduce
.
supports_c_code
([
gpu_from_host
(
x
)]):
rval
=
host_from_gpu
(
greduce
(
gpu_from_host
(
x
)))
if
rval
.
type
==
node
.
outputs
[
0
]
.
type
:
return
[
rval
]
else
:
print
>>
sys
.
stderr
,
\
"WARNING: local_gpu_
sum
got type wrong"
"WARNING: local_gpu_
careduce
got type wrong"
return
None
else
:
# Try to make a simpler pattern based on reshaping
# The principle is that if two adjacent dimensions have
# the same value in the reduce_mask, then we can reshape
# to make them a single dimension, do the
sum
, and then
# to make them a single dimension, do the
reduction
, and then
# reshape to get them back.
shape_of
=
node
.
fgraph
.
shape_feature
.
shape_of
...
...
@@ -625,27 +627,28 @@ def local_gpu_sum(node):
new_mask
.
append
(
reduce_mask
[
i
])
new_in_shp
.
append
(
x_shape
[
i
])
pattern
=
(
''
.
join
(
str
(
i
)
for
i
in
new_mask
))
new_gsum
=
GpuSum
(
new_mask
)
if
hasattr
(
new_gsum
,
'c_code_reduce_
%
s'
%
pattern
):
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
*
new_in_shp
))
sum_reshaped_x
=
host_from_gpu
(
new_gsum
(
gpu_from_host
(
reshaped_x
)))
new_greduce
=
GpuCAReduce
(
new_mask
,
scalar_op
)
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
*
new_in_shp
))
gpu_reshaped_x
=
gpu_from_host
(
reshaped_x
)
reshaped_gpu_inputs
=
[
gpu_reshaped_x
]
if
new_greduce
.
supports_c_code
(
reshaped_gpu_inputs
):
reduce_reshaped_x
=
host_from_gpu
(
new_greduce
(
gpu_reshaped_x
))
if
sum
_reshaped_x
.
ndim
!=
node
.
outputs
[
0
]
.
ndim
:
unreshaped_
sum
=
sum
_reshaped_x
.
reshape
(
if
reduce
_reshaped_x
.
ndim
!=
node
.
outputs
[
0
]
.
ndim
:
unreshaped_
reduce
=
reduce
_reshaped_x
.
reshape
(
tensor
.
stack
(
*
shape_of
[
node
.
outputs
[
0
]]))
else
:
unreshaped_
sum
=
sum
_reshaped_x
if
unreshaped_
sum
.
type
==
node
.
outputs
[
0
]
.
type
:
return
[
unreshaped_
sum
]
unreshaped_
reduce
=
reduce
_reshaped_x
if
unreshaped_
reduce
.
type
==
node
.
outputs
[
0
]
.
type
:
return
[
unreshaped_
reduce
]
else
:
print
>>
sys
.
stderr
,
\
"WARNING: local_gpu_
sum
got type wrong"
"WARNING: local_gpu_
careduce
got type wrong"
return
None
raise
Exception
(
"GpuSum don't have implemented the pattern
"
,
"GpuCAReduce does not yet implement this pattern:
"
,
pattern
)
return
False
...
...
theano/sandbox/cuda/tests/test_basic_ops.py
浏览文件 @
91c8fcf3
差异被折叠。
点击展开。
theano/sandbox/cuda/tests/test_driver.py
浏览文件 @
91c8fcf3
...
...
@@ -28,7 +28,10 @@ def test_nvidia_driver1():
profile
=
False
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
2
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
if
sum
(
isinstance
(
node
.
op
,
B
.
GpuCAReduce
)
for
node
in
topo
)
!=
1
:
msg
=
'
\n\t
'
.
join
([
'Expected exactly one occurrence of GpuCAReduce '
+
'but got:'
]
+
[
str
(
app
)
for
app
in
topo
])
raise
AssertionError
(
msg
)
if
not
numpy
.
allclose
(
f
(),
a
.
sum
()):
raise
Exception
(
"The nvidia driver version installed with this OS "
"does not give good results for reduction."
...
...
theano/sandbox/cuda/tests/test_opt.py
浏览文件 @
91c8fcf3
...
...
@@ -44,11 +44,11 @@ def test_int_pow():
f
=
theano
.
function
([
a
],
(
a
*
4
)
.
sum
(),
mode
=
mode_with_gpu
)
op_names
=
[
n
.
op
.
__class__
.
__name__
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
op_names
==
[
'Gpu
Sum
'
,
'GpuElemwise'
,
'HostFromGpu'
]
assert
op_names
==
[
'Gpu
CAReduce
'
,
'GpuElemwise'
,
'HostFromGpu'
]
f
=
theano
.
function
([
a
],
tensor
.
pow
(
a
,
4
)
.
sum
(),
mode
=
mode_with_gpu
)
op_names
=
[
n
.
op
.
__class__
.
__name__
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
op_names
==
[
'GpuElemwise'
,
'Gpu
Sum
'
,
'HostFromGpu'
]
assert
op_names
==
[
'GpuElemwise'
,
'Gpu
CAReduce
'
,
'HostFromGpu'
]
#theano.printing.debugprint(f)
...
...
theano/scalar/basic.py
浏览文件 @
91c8fcf3
...
...
@@ -1159,6 +1159,7 @@ class Maximum(BinaryScalarOp):
gx
=
eq
(
output
,
x
)
*
gz
gy
=
eq
(
output
,
y
)
*
gz
return
(
gx
,
gy
)
maximum
=
Maximum
(
upcast_out
,
name
=
'maximum'
)
...
...
@@ -1187,7 +1188,6 @@ class Minimum(BinaryScalarOp):
gx
=
eq
(
output
,
x
)
*
gz
gy
=
eq
(
output
,
y
)
*
gz
return
(
gx
,
gy
)
minimum
=
Minimum
(
upcast_out
,
name
=
'minimum'
)
...
...
@@ -1222,6 +1222,8 @@ class Add(ScalarOp):
for
i
in
inputs
:
retval
+=
[
gz
]
return
retval
add
=
Add
(
upcast_out
,
name
=
'add'
)
...
...
theano/tensor/elemwise.py
浏览文件 @
91c8fcf3
...
...
@@ -1082,14 +1082,16 @@ class Elemwise(Op):
class
CAReduce
(
Op
):
"""
CAReduce = Commutative Associative Reduce
Reduces a scalar operation along the specified axis(es).
(The scalar op should be both commutative and assocative)
The output will have the same shape as the input minus the reduced
dimensions. It will contain the variable of accumulating all values
over the reduced dimensions using the specified scalar op.
Examples:
CAReduce(add) -> sum
CAReduce(add) -> sum
(ie, acts like the numpy sum operation)
CAReduce(mul) -> product
CAReduce(maximum) -> max
CAReduce(minimum) -> min
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论