Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4ed010d8
提交
4ed010d8
authored
4月 22, 2014
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
This should get scan working with non-float32 inputs/outputs in gpuarray.
It should also not break the old cuda backend.
上级
484ee1e0
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
27 行增加
和
55 行删除
+27
-55
opt.py
theano/sandbox/cuda/opt.py
+6
-7
opt.py
theano/sandbox/gpuarray/opt.py
+2
-3
scan_op.py
theano/scan_module/scan_op.py
+19
-45
没有找到文件。
theano/sandbox/cuda/opt.py
浏览文件 @
4ed010d8
...
@@ -1535,6 +1535,11 @@ def local_gpu_extract_diagonal(node):
...
@@ -1535,6 +1535,11 @@ def local_gpu_extract_diagonal(node):
gpu_from_host
(
diag_node
.
inputs
[
0
]))]
gpu_from_host
(
diag_node
.
inputs
[
0
]))]
return
False
return
False
def
typeConstructor
(
broadcastable
,
dtype
):
if
dtype
==
'float32'
:
return
CudaNdarrayType
(
broadcastable
=
broadcastable
)
else
:
return
TensorType
(
broadcastable
=
broadcastable
,
dtype
=
dtype
)
@register_opt
(
'scan'
)
@register_opt
(
'scan'
)
@local_optimizer
([
gpu_from_host
,
scan_op
.
Scan
])
@local_optimizer
([
gpu_from_host
,
scan_op
.
Scan
])
...
@@ -1593,8 +1598,6 @@ def gpuScanOptimization(node):
...
@@ -1593,8 +1598,6 @@ def gpuScanOptimization(node):
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
info
[
'gpu_hash'
]
=
hash
(
_cmodule_key
)
info
[
'gpu_hash'
]
=
hash
(
_cmodule_key
)
typeConstructor
=
lambda
broadcastable
,
dtype
:
CudaNdarrayType
(
broadcastable
=
broadcastable
)
nw_op
=
scan_op
.
Scan
(
scan_ins
,
nw_op
=
scan_op
.
Scan
(
scan_ins
,
scan_outs
,
scan_outs
,
info
,
info
,
...
@@ -1642,10 +1645,6 @@ def gpuScanOptimization(node):
...
@@ -1642,10 +1645,6 @@ def gpuScanOptimization(node):
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
info
[
'gpu_hash'
]
=
hash
(
_cmodule_key
)
info
[
'gpu_hash'
]
=
hash
(
_cmodule_key
)
def
typeConstructor
(
broadcastable
,
dtype
):
assert
dtype
==
'float32'
return
CudaNdarrayType
(
broadcastable
=
broadcastable
)
_outputs
=
scan_op
.
Scan
(
_outputs
=
scan_op
.
Scan
(
scan_ins
,
scan_ins
,
scan_outs
,
scan_outs
,
...
@@ -1662,7 +1661,7 @@ def gpuScanOptimization(node):
...
@@ -1662,7 +1661,7 @@ def gpuScanOptimization(node):
optdb
.
register
(
'gpu_scanOp_make_inplace'
,
optdb
.
register
(
'gpu_scanOp_make_inplace'
,
scan_opt
.
ScanInplaceOptimizer
(
typeConstructor
=
CudaNdarrayType
,
scan_opt
.
ScanInplaceOptimizer
(
typeConstructor
=
typeConstructor
,
gpu_flag
=
True
),
gpu_flag
=
True
),
75
,
75
,
'gpu'
,
'gpu'
,
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
4ed010d8
...
@@ -518,7 +518,6 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
...
@@ -518,7 +518,6 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
@op_lifter
([
scan_op
.
Scan
])
@op_lifter
([
scan_op
.
Scan
])
def
local_scan_to_gpua
(
node
):
def
local_scan_to_gpua
(
node
):
info
=
copy
.
deepcopy
(
node
.
op
.
info
)
info
=
copy
.
deepcopy
(
node
.
op
.
info
)
info
[
'gpu'
]
=
True
info
[
'gpua'
]
=
True
info
[
'gpua'
]
=
True
nw_ins
=
[
node
.
inputs
[
0
]]
nw_ins
=
[
node
.
inputs
[
0
]]
e
=
(
1
+
e
=
(
1
+
...
@@ -540,8 +539,8 @@ def local_scan_to_gpua(node):
...
@@ -540,8 +539,8 @@ def local_scan_to_gpua(node):
[
safe_to_cpu
(
x
)
for
x
in
scan_ins
]))
[
safe_to_cpu
(
x
)
for
x
in
scan_ins
]))
# We need to construct the hash here, because scan
# We need to construct the hash here, because scan
# __init__ does not know about
cuda ndarray
and can not
# __init__ does not know about
the gpu
and can not
# handle graphs with inputs being
Cuda Ndarrays
# handle graphs with inputs being
on the gpu
tmp_in
,
tmp_out
=
gpu_reconstruct_graph
(
scan_ins
,
scan_outs
)
tmp_in
,
tmp_out
=
gpu_reconstruct_graph
(
scan_ins
,
scan_outs
)
local_fgraph
=
gof
.
FunctionGraph
(
tmp_in
,
tmp_out
,
clone
=
False
)
local_fgraph
=
gof
.
FunctionGraph
(
tmp_in
,
tmp_out
,
clone
=
False
)
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
...
...
theano/scan_module/scan_op.py
浏览文件 @
4ed010d8
...
@@ -56,22 +56,21 @@ class Scan(PureOp):
...
@@ -56,22 +56,21 @@ class Scan(PureOp):
the scan op (like number of different types of
the scan op (like number of different types of
arguments, name, mode, if it should run on GPU or
arguments, name, mode, if it should run on GPU or
not, etc.)
not, etc.)
:param typeConstructor: function that constructs a Theano TensorType
:param typeConstructor: function that constructs an equivalent
able to represent a float32 ndarray.
to Theano TensorType
Note: ``typeConstructor`` had been added to refactor how Theano
deals with the GPU. If it runs on the GPU, scan needs to construct
Note: ``typeConstructor`` had been added to refactor how
certain outputs (those who reside in the GPU memory) as CudaNdarray.
Theano deals with the GPU. If it runs on the GPU, scan needs
However we can not import cuda in this file (as it is in sandbox,
to construct certain outputs (those who reside in the GPU
and not available on each machine) so the workaround is that the GPU
memory) as the GPU-specific type. However we can not import
optimization (which is aware of cuda types) passes to the
gpu code in this file (as it is in sandbox, and not available
constructor of this class a function that is able to construct
on each machine) so the workaround is that the GPU
CudaNdarray. This way the class Scan does not need to be aware of
optimization passes to the constructor of this class a
CudaNdarray, it just constructs any float32 tensor using this
function that is able to construct a GPU type. This way the
function (which by default constructs normal tensors). Note that the
class Scan does not need to be aware of the details for the
second assumption in this code is that any float32 output or input
GPU, it just constructs any tensor using this function (which
will be moved on the GPU if the optimization gets applied (following
by default constructs normal tensors).
Theano's philosophy of moving as much as possible on gpu).
"""
"""
if
'gpua'
not
in
info
:
if
'gpua'
not
in
info
:
info
[
'gpua'
]
=
False
info
[
'gpua'
]
=
False
...
@@ -97,23 +96,10 @@ class Scan(PureOp):
...
@@ -97,23 +96,10 @@ class Scan(PureOp):
# Not that for mit_mot there are several output slices per
# Not that for mit_mot there are several output slices per
# output sequence
# output sequence
o
=
outputs
[
idx
]
o
=
outputs
[
idx
]
# Scan assumes that only variables of dtype float32 might need a
# special constructor (i.e. CudaNdarray constructor) when the
# code is running on GPU, as it is the only type supported by
# Theano yet. Therefore only for dtype float32 we use the passed
# type constructor ``typeConstructor``. For anything else we
# know that even if we run it on the GPU we still construct
# normal Theano tensors.
if
o
.
type
.
dtype
in
[
'float32'
]:
self
.
output_types
.
append
(
self
.
output_types
.
append
(
typeConstructor
(
typeConstructor
(
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
dtype
=
o
.
type
.
dtype
))
dtype
=
o
.
type
.
dtype
))
else
:
self
.
output_types
.
append
(
tensorConstructor
(
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
dtype
=
o
.
type
.
dtype
))
idx
+=
len
(
self
.
mit_mot_out_slices
[
jdx
])
idx
+=
len
(
self
.
mit_mot_out_slices
[
jdx
])
jdx
+=
1
jdx
+=
1
...
@@ -122,23 +108,11 @@ class Scan(PureOp):
...
@@ -122,23 +108,11 @@ class Scan(PureOp):
end
=
idx
+
self
.
n_mit_sot
+
self
.
n_sit_sot
+
self
.
n_nit_sot
end
=
idx
+
self
.
n_mit_sot
+
self
.
n_sit_sot
+
self
.
n_nit_sot
for
o
in
outputs
[
idx
:
end
]:
for
o
in
outputs
[
idx
:
end
]:
# Scan assumes that only variables of dtype float32 might need a
# special constructor (i.e. CudaNdarray constructor) when the
# code is running on GPU, as it is the only type supported by
# Theano yet. Therefore only for dtype float32 we use the passed
# type constructor ``typeConstructor``. For anything else we
# know that even if we run it on the GPU we still construct
# normal Theano tensors.
if
o
.
type
.
dtype
in
[
'float32'
]:
self
.
output_types
.
append
(
self
.
output_types
.
append
(
typeConstructor
(
typeConstructor
(
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
dtype
=
o
.
type
.
dtype
))
dtype
=
o
.
type
.
dtype
))
else
:
self
.
output_types
.
append
(
tensorConstructor
(
broadcastable
=
(
False
,)
+
o
.
type
.
broadcastable
,
dtype
=
o
.
type
.
dtype
))
# shared outputs + possibly the ending condition
# shared outputs + possibly the ending condition
for
o
in
outputs
[
end
:]:
for
o
in
outputs
[
end
:]:
self
.
output_types
.
append
(
o
.
type
)
self
.
output_types
.
append
(
o
.
type
)
...
@@ -184,14 +158,14 @@ class Scan(PureOp):
...
@@ -184,14 +158,14 @@ class Scan(PureOp):
self
.
n_shared_outs
)
self
.
n_shared_outs
)
self
.
n_outs
=
self
.
n_mit_mot
+
self
.
n_mit_sot
+
self
.
n_sit_sot
self
.
n_outs
=
self
.
n_mit_mot
+
self
.
n_mit_sot
+
self
.
n_sit_sot
self
.
n_tap_outs
=
self
.
n_mit_mot
+
self
.
n_mit_sot
self
.
n_tap_outs
=
self
.
n_mit_mot
+
self
.
n_mit_sot
if
not
self
.
info
[
'gpu'
]:
if
self
.
info
[
'gpu'
]
or
self
.
info
[
'gpua'
]:
self
.
_hash_inner_graph
=
self
.
info
[
'gpu_hash'
]
else
:
tmp_in
,
tmp_out
=
scan_utils
.
reconstruct_graph
(
self
.
inputs
,
tmp_in
,
tmp_out
=
scan_utils
.
reconstruct_graph
(
self
.
inputs
,
self
.
outputs
)
self
.
outputs
)
local_fgraph
=
gof
.
FunctionGraph
(
tmp_in
,
tmp_out
,
clone
=
False
)
local_fgraph
=
gof
.
FunctionGraph
(
tmp_in
,
tmp_out
,
clone
=
False
)
self
.
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
self
.
_cmodule_key
=
gof
.
CLinker
()
.
cmodule_key_
(
local_fgraph
,
[])
self
.
_hash_inner_graph
=
hash
(
self
.
_cmodule_key
)
self
.
_hash_inner_graph
=
hash
(
self
.
_cmodule_key
)
else
:
self
.
_hash_inner_graph
=
self
.
info
[
'gpu_hash'
]
def
make_node
(
self
,
*
inputs
):
def
make_node
(
self
,
*
inputs
):
"""
"""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论