Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
49af6efe
提交
49af6efe
authored
8月 10, 2015
作者:
Iban Harlouchet
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
numpydoc for theano/sandbox/cuda/opt.py
上级
e9235e29
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
55 行增加
和
19 行删除
+55
-19
opt.py
theano/sandbox/cuda/opt.py
+55
-19
没有找到文件。
theano/sandbox/cuda/opt.py
浏览文件 @
49af6efe
...
@@ -141,7 +141,9 @@ class InputToGpuOptimizer(Optimizer):
...
@@ -141,7 +141,9 @@ class InputToGpuOptimizer(Optimizer):
Transfer the input of a graph to the gpu if it is necessary.
Transfer the input of a graph to the gpu if it is necessary.
It should make this part of the optimizer faster we will will need only 1
It should make this part of the optimizer faster we will will need only 1
pass on the fgraph.
pass on the fgraph.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
Optimizer
.
__init__
(
self
)
Optimizer
.
__init__
(
self
)
...
@@ -208,7 +210,10 @@ def dtype_in_elemwise_supported(op):
...
@@ -208,7 +210,10 @@ def dtype_in_elemwise_supported(op):
Return True of the Elemwise op is supported on the gpu.
Return True of the Elemwise op is supported on the gpu.
Return False otherwise.
Return False otherwise.
:note: We need to check inside the Composite op.
Notes
-----
We need to check inside the Composite op.
"""
"""
def
get_all_basic_scalar
(
composite_op
):
def
get_all_basic_scalar
(
composite_op
):
l
=
[]
l
=
[]
...
@@ -231,8 +236,10 @@ def dtype_in_elemwise_supported(op):
...
@@ -231,8 +236,10 @@ def dtype_in_elemwise_supported(op):
@register_opt
()
@register_opt
()
@local_optimizer
([
tensor
.
Elemwise
])
@local_optimizer
([
tensor
.
Elemwise
])
def
local_gpu_elemwise_0
(
node
):
def
local_gpu_elemwise_0
(
node
):
"""elemwise(..., host_from_gpu, ...)
"""
-> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)
Elemwise(..., host_from_gpu, ...)
-> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)
"""
"""
if
(
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
and
if
(
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
and
dtype_in_elemwise_supported
(
node
.
op
)):
dtype_in_elemwise_supported
(
node
.
op
)):
...
@@ -294,6 +301,7 @@ def local_gpu_elemwise_0(node):
...
@@ -294,6 +301,7 @@ def local_gpu_elemwise_0(node):
def
local_gpu_elemwise_1
(
node
):
def
local_gpu_elemwise_1
(
node
):
"""
"""
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_i
,
=
node
.
inputs
host_i
,
=
node
.
inputs
...
@@ -350,6 +358,7 @@ def local_gpu_dimshuffle_0(node):
...
@@ -350,6 +358,7 @@ def local_gpu_dimshuffle_0(node):
"""
"""
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
gpu_from_host(dimshuffle) -> gpu_dimshuffle(gpu_from_host)
gpu_from_host(dimshuffle) -> gpu_dimshuffle(gpu_from_host)
"""
"""
if
isinstance
(
node
.
op
,
tensor
.
DimShuffle
):
if
isinstance
(
node
.
op
,
tensor
.
DimShuffle
):
input
,
=
node
.
inputs
input
,
=
node
.
inputs
...
@@ -375,6 +384,7 @@ def local_gpu_specifyShape_0(node):
...
@@ -375,6 +384,7 @@ def local_gpu_specifyShape_0(node):
"""
"""
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
gpu_from_host(specify_shape) -> specify_shape(gpu_from_host)
gpu_from_host(specify_shape) -> specify_shape(gpu_from_host)
"""
"""
if
isinstance
(
node
.
op
,
tensor
.
SpecifyShape
):
if
isinstance
(
node
.
op
,
tensor
.
SpecifyShape
):
input
=
node
.
inputs
[
0
]
input
=
node
.
inputs
[
0
]
...
@@ -403,11 +413,11 @@ def local_gpu_dot_to_dot22(node):
...
@@ -403,11 +413,11 @@ def local_gpu_dot_to_dot22(node):
transforming the vector into a matrix, apply gpudot22 and reshaping
transforming the vector into a matrix, apply gpudot22 and reshaping
the output.
the output.
A more suitable solution would be to use the right cublas call
A more suitable solution would be to use the right cublas call
.
This is needed in fast_compile
This is needed in fast_compile.
"""
"""
# In case the got do input upcast, we much check that we can
# In case the got do input upcast, we much check that we can
# make it run on the gpu.
# make it run on the gpu.
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
...
@@ -482,10 +492,11 @@ theano.compile.optdb.register('assert_no_cpu_op', assert_no_cpu_op, 49.2)
...
@@ -482,10 +492,11 @@ theano.compile.optdb.register('assert_no_cpu_op', assert_no_cpu_op, 49.2)
@register_opt
()
@register_opt
()
@local_optimizer
([
theano
.
ifelse
.
IfElse
,
gpu_from_host
])
@local_optimizer
([
theano
.
ifelse
.
IfElse
,
gpu_from_host
])
def
local_gpu_lazy_ifelse
(
node
):
def
local_gpu_lazy_ifelse
(
node
):
"""
"""
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
ifelse(host_from_gpu) -> host_from_gpu(ifelse)
ifelse(host_from_gpu) -> host_from_gpu(ifelse)
"""
"""
if
isinstance
(
node
.
op
,
theano
.
ifelse
.
IfElse
)
and
not
node
.
op
.
gpu
:
if
isinstance
(
node
.
op
,
theano
.
ifelse
.
IfElse
)
and
not
node
.
op
.
gpu
:
gpu_ifelse
=
theano
.
ifelse
.
IfElse
(
node
.
op
.
n_outs
,
gpu
=
True
)
gpu_ifelse
=
theano
.
ifelse
.
IfElse
(
node
.
op
.
n_outs
,
gpu
=
True
)
...
@@ -554,6 +565,7 @@ def local_gpu_dot22(node):
...
@@ -554,6 +565,7 @@ def local_gpu_dot22(node):
gpu_from_host(dot22) -> gpudot(gpu_from_host)
gpu_from_host(dot22) -> gpudot(gpu_from_host)
dot(host_from_gpu) -> host_from_gpu(gpudot22)
dot(host_from_gpu) -> host_from_gpu(gpudot22)
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
@@ -577,6 +589,7 @@ def local_gpu_dot22scalar(node):
...
@@ -577,6 +589,7 @@ def local_gpu_dot22scalar(node):
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
dot(host_from_gpu) -> host_from_gpu(gpudot22scalar)
dot(host_from_gpu) -> host_from_gpu(gpudot22scalar)
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
@@ -602,7 +615,9 @@ def local_gpu_dot22scalar(node):
...
@@ -602,7 +615,9 @@ def local_gpu_dot22scalar(node):
def
local_gpu_solve
(
node
):
def
local_gpu_solve
(
node
):
"""
"""
gpu_from_host(CpuSolve) -> GpuSolve(gpu_from_host)
gpu_from_host(CpuSolve) -> GpuSolve(gpu_from_host)
CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve)
CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve)
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
@@ -627,6 +642,7 @@ def local_gpu_solve(node):
...
@@ -627,6 +642,7 @@ def local_gpu_solve(node):
def
local_gpu_gemv
(
node
):
def
local_gpu_gemv
(
node
):
"""
"""
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
gemv(host_from_gpu) -> host_from_gpu(gpu_gemv)
gemv(host_from_gpu) -> host_from_gpu(gpu_gemv)
"""
"""
...
@@ -665,6 +681,7 @@ def local_gpu_gemv(node):
...
@@ -665,6 +681,7 @@ def local_gpu_gemv(node):
def
local_gpu_ger
(
node
):
def
local_gpu_ger
(
node
):
"""
"""
gpu_from_host(ger) -> gpu_ger(gpu_from_host)
gpu_from_host(ger) -> gpu_ger(gpu_from_host)
ger(host_from_gpu) -> host_from_gpu(gpu_ger)
ger(host_from_gpu) -> host_from_gpu(gpu_ger)
"""
"""
...
@@ -706,6 +723,7 @@ def local_gpu_gemm(node):
...
@@ -706,6 +723,7 @@ def local_gpu_gemm(node):
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
gemm(host_from_gpu) -> host_from_gpu(gpu_gemm)
gemm(host_from_gpu) -> host_from_gpu(gpu_gemm)
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
@@ -1120,7 +1138,10 @@ def local_gpu_shape(node):
...
@@ -1120,7 +1138,10 @@ def local_gpu_shape(node):
@register_opt
()
@register_opt
()
@local_optimizer
([
tensor
.
Rebroadcast
])
@local_optimizer
([
tensor
.
Rebroadcast
])
def
local_gpu_rebroadcast
(
node
):
def
local_gpu_rebroadcast
(
node
):
'''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))'''
"""
rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))
"""
if
isinstance
(
node
.
op
,
tensor
.
Rebroadcast
):
if
isinstance
(
node
.
op
,
tensor
.
Rebroadcast
):
x
,
=
node
.
inputs
x
,
=
node
.
inputs
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
...
@@ -1342,7 +1363,8 @@ def local_conv_fft_full(node):
...
@@ -1342,7 +1363,8 @@ def local_conv_fft_full(node):
def
values_eq_approx_high_tol
(
a
,
b
):
def
values_eq_approx_high_tol
(
a
,
b
):
"""This fct is needed to don't have DebugMode raise useless
"""
This fct is needed to don't have DebugMode raise useless
error due to ronding error.
error due to ronding error.
This happen as We reduce on the two last dimensions, so this
This happen as We reduce on the two last dimensions, so this
...
@@ -1364,6 +1386,7 @@ def local_gpu_conv(node):
...
@@ -1364,6 +1386,7 @@ def local_gpu_conv(node):
gpu_from_host(conv) -> gpu_conv(gpu_from_host)
gpu_from_host(conv) -> gpu_conv(gpu_from_host)
conv(host_from_gpu) -> host_from_gpu(gpu_conv)
conv(host_from_gpu) -> host_from_gpu(gpu_conv)
"""
"""
def
GpuConvOp_from_ConvOp
(
op
):
def
GpuConvOp_from_ConvOp
(
op
):
logical_img_hw
=
None
logical_img_hw
=
None
...
@@ -1534,7 +1557,10 @@ conv_groupopt.register('local_conv_gemm', local_conv_gemm, 30,
...
@@ -1534,7 +1557,10 @@ conv_groupopt.register('local_conv_gemm', local_conv_gemm, 30,
class
LocalCudaMetaOptimizer
(
LocalMetaOptimizer
):
class
LocalCudaMetaOptimizer
(
LocalMetaOptimizer
):
"""Base class for CUDA-based LocalMetaOptimizers"""
"""
Base class for CUDA-based LocalMetaOptimizers.
"""
def
time_call
(
self
,
fn
):
def
time_call
(
self
,
fn
):
# Override time_call() to do device synchronization
# Override time_call() to do device synchronization
...
@@ -1827,7 +1853,6 @@ def local_gpu_join(node):
...
@@ -1827,7 +1853,6 @@ def local_gpu_join(node):
by other opts, leaving us with
by other opts, leaving us with
host_from_gpu(gpu_join)
host_from_gpu(gpu_join)
For intermediate places in the graph not covered by the first opt, the
For intermediate places in the graph not covered by the first opt, the
following could be useful:
following could be useful:
...
@@ -1911,8 +1936,12 @@ optdb.register('InplaceGpuBlasOpt',
...
@@ -1911,8 +1936,12 @@ optdb.register('InplaceGpuBlasOpt',
def
get_device_type_sizes
():
def
get_device_type_sizes
():
"""
"""
:return:(gpu ptr size, cpu ptr size, int sizes(gpu and cpu))
:return type: tuple
Returns
-------
tuple
(gpu ptr size, cpu ptr size, int sizes(gpu and cpu)).
"""
"""
if
hasattr
(
get_device_type_sizes
,
'rval'
):
if
hasattr
(
get_device_type_sizes
,
'rval'
):
return
get_device_type_sizes
.
rval
return
get_device_type_sizes
.
rval
...
@@ -1941,7 +1970,7 @@ def get_device_type_sizes():
...
@@ -1941,7 +1970,7 @@ def get_device_type_sizes():
def
max_inputs_to_GpuElemwise
(
node
):
def
max_inputs_to_GpuElemwise
(
node
):
"""
"""
r
eturn the maximum number of inputs this GpuElemwise Apply node can
R
eturn the maximum number of inputs this GpuElemwise Apply node can
accept.
accept.
This is needed as currently there is a limit of 256 bytes of
This is needed as currently there is a limit of 256 bytes of
...
@@ -1950,8 +1979,8 @@ def max_inputs_to_GpuElemwise(node):
...
@@ -1950,8 +1979,8 @@ def max_inputs_to_GpuElemwise(node):
2.x (not used).
2.x (not used).
This measures the number of parameters we put in our GPU function and
This measures the number of parameters we put in our GPU function and
computes the maximum number of inputs that respect the 256 byte
computes the maximum number of inputs that respect the 256 byte
limit.
limit.
"""
"""
type_sizes
=
get_device_type_sizes
()
type_sizes
=
get_device_type_sizes
()
int_size
=
type_sizes
[
'int_size'
]
int_size
=
type_sizes
[
'int_size'
]
...
@@ -1986,6 +2015,7 @@ def split_huge_add_or_mul(node):
...
@@ -1986,6 +2015,7 @@ def split_huge_add_or_mul(node):
This should not happen for other GpuElemwise as their is only the fusion
This should not happen for other GpuElemwise as their is only the fusion
that can generate op with too much input and it check for that.
that can generate op with too much input and it check for that.
"""
"""
if
node
.
op
.
scalar_op
in
(
scal
.
add
,
scal
.
mul
):
if
node
.
op
.
scalar_op
in
(
scal
.
add
,
scal
.
mul
):
max_nb_inputs
=
max_inputs_to_GpuElemwise
(
node
)
max_nb_inputs
=
max_inputs_to_GpuElemwise
(
node
)
...
@@ -2135,6 +2165,7 @@ def local_gpu_eye(node):
...
@@ -2135,6 +2165,7 @@ def local_gpu_eye(node):
gpu_from_host(eye) -> gpueye(gpu_from_host)
gpu_from_host(eye) -> gpueye(gpu_from_host)
eye(host_from_gpu) -> host_from_gpu(gpueye)
eye(host_from_gpu) -> host_from_gpu(gpueye)
"""
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
@@ -2167,10 +2198,11 @@ def safe_to_cpu(x):
...
@@ -2167,10 +2198,11 @@ def safe_to_cpu(x):
def
gpu_safe_new
(
x
,
tag
=
''
):
def
gpu_safe_new
(
x
,
tag
=
''
):
"""
"""
Internal function that constructs a new variable from x with the same
Internal function that constructs a new variable from x with the same
type, but with a different name (
old name + tag). This function is used
type, but with a different name (old name + tag). This function is used
by gradient, or the R-op to construct new variables for the inputs of
by gradient, or the R-op to construct new variables for the inputs of
the inner graph such that there is no interference between the original
the inner graph such that there is no interference between the original
graph and the newly constructed graph.
graph and the newly constructed graph.
"""
"""
if
hasattr
(
x
,
'name'
)
and
x
.
name
is
not
None
:
if
hasattr
(
x
,
'name'
)
and
x
.
name
is
not
None
:
nw_name
=
x
.
name
+
tag
nw_name
=
x
.
name
+
tag
...
@@ -2188,8 +2220,9 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
...
@@ -2188,8 +2220,9 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
"""
"""
Different interface to clone, that allows you to pass inputs.
Different interface to clone, that allows you to pass inputs.
Compared to clone, this method always replaces the inputs with
Compared to clone, this method always replaces the inputs with
new variables of the same type, and returns those (
in the same
new variables of the same type, and returns those (in the same
order as the original inputs).
order as the original inputs).
"""
"""
if
tag
is
None
:
if
tag
is
None
:
tag
=
''
tag
=
''
...
@@ -2217,7 +2250,9 @@ def tensor_to_cuda(x):
...
@@ -2217,7 +2250,9 @@ def tensor_to_cuda(x):
def
local_gpu_extract_diagonal
(
node
):
def
local_gpu_extract_diagonal
(
node
):
"""
"""
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
"""
"""
if
(
isinstance
(
node
.
op
,
nlinalg
.
ExtractDiag
)
and
if
(
isinstance
(
node
.
op
,
nlinalg
.
ExtractDiag
)
and
isinstance
(
node
.
inputs
[
0
]
.
type
,
isinstance
(
node
.
inputs
[
0
]
.
type
,
...
@@ -2249,9 +2284,10 @@ def typeConstructor(broadcastable, dtype):
...
@@ -2249,9 +2284,10 @@ def typeConstructor(broadcastable, dtype):
def
gpuScanOptimization
(
node
):
def
gpuScanOptimization
(
node
):
"""
"""
scan(host_from_gpu) -> host_from_gpu(GPUscan)
scan(host_from_gpu) -> host_from_gpu(GPUscan)
gpu_from_host(scan) -> GPUscan(gpu_from_host)
gpu_from_host(scan) -> GPUscan(gpu_from_host)
"""
"""
# gpu_from_host(scan) -> GPUscan(gpu_from_host)
# gpu_from_host(scan) -> GPUscan(gpu_from_host)
if
isinstance
(
node
.
op
,
GpuFromHost
):
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
host_input
=
node
.
inputs
[
0
]
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论