Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ebcf5615
提交
ebcf5615
authored
7月 08, 2016
作者:
sentient07
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
standardized opt name and return type of gpualloc and gpueye
上级
aa853330
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
36 行增加
和
36 行删除
+36
-36
profiling.py
theano/compile/profiling.py
+1
-1
dnn.py
theano/gpuarray/dnn.py
+5
-5
extra_ops.py
theano/gpuarray/extra_ops.py
+1
-1
neighbours.py
theano/gpuarray/neighbours.py
+1
-1
nerv.py
theano/gpuarray/nerv.py
+2
-2
opt.py
theano/gpuarray/opt.py
+26
-26
没有找到文件。
theano/compile/profiling.py
浏览文件 @
ebcf5615
...
@@ -54,7 +54,7 @@ def _atexit_print_fn():
...
@@ -54,7 +54,7 @@ def _atexit_print_fn():
destination_file
=
open
(
config
.
profiling
.
destination
,
'w'
)
destination_file
=
open
(
config
.
profiling
.
destination
,
'w'
)
for
ps
in
_atexit_print_list
:
for
ps
in
_atexit_print_list
:
if
ps
.
fct_callcount
>
1
or
ps
.
compile_time
>
1
:
if
ps
.
fct_callcount
>
=
1
or
ps
.
compile_time
>
1
:
ps
.
summary
(
file
=
destination_file
,
ps
.
summary
(
file
=
destination_file
,
n_ops_to_print
=
config
.
profiling
.
n_ops
,
n_ops_to_print
=
config
.
profiling
.
n_ops
,
n_apply_to_print
=
config
.
profiling
.
n_apply
)
n_apply_to_print
=
config
.
profiling
.
n_apply
)
...
...
theano/gpuarray/dnn.py
浏览文件 @
ebcf5615
...
@@ -1550,7 +1550,7 @@ def local_dnn_convi_output_merge(node, *inputs):
...
@@ -1550,7 +1550,7 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
Pool
])
@op_lifter
([
Pool
])
@register_opt2
([
Pool
],
'fast_compile'
,
'cudnn'
)
@register_opt2
([
Pool
],
'fast_compile'
,
'cudnn'
)
def
local_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
...
@@ -1567,7 +1567,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs, outputs):
...
@@ -1567,7 +1567,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs, outputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
MaxPoolGrad
])
@op_lifter
([
MaxPoolGrad
])
@register_opt2
([
MaxPoolGrad
],
'fast_compile'
,
'cudnn'
)
@register_opt2
([
MaxPoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
...
@@ -1592,7 +1592,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -1592,7 +1592,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
AveragePoolGrad
])
@op_lifter
([
AveragePoolGrad
])
@register_opt2
([
AveragePoolGrad
],
'fast_compile'
,
'cudnn'
)
@register_opt2
([
AveragePoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
...
@@ -1646,7 +1646,7 @@ def local_log_softmax_dnn(node):
...
@@ -1646,7 +1646,7 @@ def local_log_softmax_dnn(node):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
LogSoftmax
])
@op_lifter
([
LogSoftmax
])
@register_opt2
([
LogSoftmax
],
'fast_compile'
,
'cudnn'
)
@register_opt2
([
LogSoftmax
],
'fast_compile'
,
'cudnn'
)
def
local_logsoftmax_to_dnn
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
logsoftmax_to_dnn
(
op
,
ctx_name
,
inputs
,
outputs
):
# Transform the input in the format expected by GpuDnnSoftmax
# Transform the input in the format expected by GpuDnnSoftmax
inp
=
inputs
[
0
]
inp
=
inputs
[
0
]
if
inp
.
ndim
!=
2
:
if
inp
.
ndim
!=
2
:
...
@@ -1685,7 +1685,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
...
@@ -1685,7 +1685,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
SoftmaxGrad
])
@op_lifter
([
SoftmaxGrad
])
@register_opt2
([
SoftmaxGrad
],
'cudnn'
,
'fast_compile'
)
@register_opt2
([
SoftmaxGrad
],
'cudnn'
,
'fast_compile'
)
def
local_softmax_dnn_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
softmax_dnn_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
(
"cuDNN needed for SoftmaxGrad"
)
raise_no_cudnn
(
"cuDNN needed for SoftmaxGrad"
)
ins
=
[]
ins
=
[]
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
ebcf5615
...
@@ -456,7 +456,7 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -456,7 +456,7 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
CumsumOp
])
@op_lifter
([
CumsumOp
])
@register_opt2
([
CumsumOp
],
'fast_compile'
)
@register_opt2
([
CumsumOp
],
'fast_compile'
)
def
use_gpu
_cumsumop
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua
_cumsumop
(
op
,
ctx_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float32'
:
if
inputs
[
0
]
.
dtype
==
'float32'
:
axis
=
op
.
axis
axis
=
op
.
axis
x
=
inputs
[
0
]
x
=
inputs
[
0
]
...
...
theano/gpuarray/neighbours.py
浏览文件 @
ebcf5615
...
@@ -471,6 +471,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -471,6 +471,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
Images2Neibs
])
@op_lifter
([
Images2Neibs
])
@register_opt2
([
Images2Neibs
],
'fast_compile'
)
@register_opt2
([
Images2Neibs
],
'fast_compile'
)
def
use_gpu
_images2neibs
(
op
,
context_name
,
inputs
):
def
local_gpua
_images2neibs
(
op
,
context_name
,
inputs
):
if
op
.
mode
in
[
'valid'
,
'ignore_borders'
,
'wrap_centered'
]:
if
op
.
mode
in
[
'valid'
,
'ignore_borders'
,
'wrap_centered'
]:
return
GpuImages2Neibs
(
op
.
mode
)
return
GpuImages2Neibs
(
op
.
mode
)
theano/gpuarray/nerv.py
浏览文件 @
ebcf5615
...
@@ -150,14 +150,14 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
...
@@ -150,14 +150,14 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
@opt.register_opt
(
'fast_compile'
)
@opt.register_opt
(
'fast_compile'
)
@opt.op_lifter
([
tensor
.
Dot
])
@opt.op_lifter
([
tensor
.
Dot
])
@opt.register_opt2
([
tensor
.
Dot
],
'fast_compile'
)
@opt.register_opt2
([
tensor
.
Dot
],
'fast_compile'
)
def
local_dot_to_gemm16
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_
gpua_
dot_to_gemm16
(
op
,
ctx_name
,
inputs
,
outputs
):
if
nerv
is
None
:
if
nerv
is
None
:
return
return
A
=
inputs
[
0
]
A
=
inputs
[
0
]
B
=
inputs
[
1
]
B
=
inputs
[
1
]
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
fgraph
=
getattr
(
in
puts
[
0
],
'fgraph'
,
None
)
fgraph
=
getattr
(
out
puts
[
0
],
'fgraph'
,
None
)
C
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
'float16'
)(
C
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
'float16'
)(
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
...
...
theano/gpuarray/opt.py
浏览文件 @
ebcf5615
...
@@ -299,12 +299,12 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -299,12 +299,12 @@ class GraphToGPU(NavigatorOptimizer):
# Building a new graph
# Building a new graph
# Iterating through inputs of graph
# Iterating through inputs of graph
target
=
str
(
infer_context_name
(
*
fgraph
.
inputs
)
)
target
=
infer_context_name
(
*
fgraph
.
inputs
)
for
i
in
fgraph
.
inputs
:
for
i
in
fgraph
.
inputs
:
# Do not move *int* scalar to the GPU.
# Do not move *int* scalar to the GPU.
if
(
isinstance
(
i
.
type
,
tensor
.
TensorType
)
and
if
(
isinstance
(
i
.
type
,
tensor
.
TensorType
)
and
(
i
.
ndim
>
0
or
'int'
not
in
i
.
dtype
)):
(
i
.
ndim
>
0
or
'int'
not
in
i
.
dtype
)):
mapping
[
i
]
=
i
.
transfer
(
getattr
(
i
.
tag
,
target
,
None
))
mapping
[
i
]
=
i
.
transfer
(
getattr
(
i
.
tag
,
'target'
,
target
))
else
:
else
:
mapping
[
i
]
=
i
mapping
[
i
]
=
i
for
i
in
fgraph
.
variables
:
for
i
in
fgraph
.
variables
:
...
@@ -551,7 +551,7 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua',
...
@@ -551,7 +551,7 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua',
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@local_optimizer
([
tensor
.
Alloc
])
@local_optimizer
([
tensor
.
Alloc
])
def
local_gpuaalloc2
(
node
):
def
local_gpua
_
alloc2
(
node
):
"""
"""
Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)
Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)
...
@@ -576,17 +576,17 @@ def local_gpuaalloc2(node):
...
@@ -576,17 +576,17 @@ def local_gpuaalloc2(node):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
Alloc
])
@op_lifter
([
tensor
.
Alloc
])
@register_opt2
([
tensor
.
Alloc
],
'fast_compile'
)
@register_opt2
([
tensor
.
Alloc
],
'fast_compile'
)
def
local_gpuaalloc
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua
_
alloc
(
op
,
context_name
,
inputs
,
outputs
):
return
gpu_alloc
(
context_name
)
(
*
inputs
)
return
gpu_alloc
(
context_name
)
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
AllocEmpty
])
@op_lifter
([
tensor
.
AllocEmpty
])
@register_opt2
([
tensor
.
AllocEmpty
],
'fast_compile'
)
@register_opt2
([
tensor
.
AllocEmpty
],
'fast_compile'
)
def
local_gpuaallocempty
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua
_
allocempty
(
op
,
context_name
,
inputs
,
outputs
):
# We use _props_dict() to make sure that the GPU op know all the
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
# CPU op props.
return
gpu_alloc_empty
(
context_name
,
**
op
.
_props_dict
())
(
*
inputs
)
return
gpu_alloc_empty
(
context_name
,
**
op
.
_props_dict
())
@register_opt
()
@register_opt
()
...
@@ -632,14 +632,14 @@ def local_gpu_contiguous_gpu_contiguous(node):
...
@@ -632,14 +632,14 @@ def local_gpu_contiguous_gpu_contiguous(node):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
extra_ops
.
CpuContiguous
])
@op_lifter
([
tensor
.
extra_ops
.
CpuContiguous
])
@register_opt2
([
tensor
.
extra_ops
.
CpuContiguous
],
'fast_compile'
)
@register_opt2
([
tensor
.
extra_ops
.
CpuContiguous
],
'fast_compile'
)
def
local_gpu_contiguous
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu
a
_contiguous
(
op
,
context_name
,
inputs
,
outputs
):
return
gpu_contiguous
return
gpu_contiguous
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
Reshape
])
@op_lifter
([
tensor
.
Reshape
])
@register_opt2
([
tensor
.
Reshape
],
'fast_compile'
)
@register_opt2
([
tensor
.
Reshape
],
'fast_compile'
)
def
local_gpureshape
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu
a_
reshape
(
op
,
context_name
,
inputs
,
outputs
):
name
=
op
.
name
name
=
op
.
name
if
name
:
if
name
:
name
=
'Gpu'
+
name
name
=
'Gpu'
+
name
...
@@ -650,14 +650,14 @@ def local_gpureshape(op, context_name, inputs, outputs):
...
@@ -650,14 +650,14 @@ def local_gpureshape(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
Rebroadcast
])
@op_lifter
([
tensor
.
Rebroadcast
])
@register_opt2
([
tensor
.
Rebroadcast
],
'fast_compile'
)
@register_opt2
([
tensor
.
Rebroadcast
],
'fast_compile'
)
def
local_gpu_rebroadcast
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu
a
_rebroadcast
(
op
,
context_name
,
inputs
,
outputs
):
return
op
(
as_gpuarray_variable
(
inputs
[
0
],
context_name
))
return
op
(
as_gpuarray_variable
(
inputs
[
0
],
context_name
))
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
Flatten
])
@op_lifter
([
tensor
.
Flatten
])
@register_opt2
([
tensor
.
Flatten
],
'fast_compile'
)
@register_opt2
([
tensor
.
Flatten
],
'fast_compile'
)
def
local_gpuflatten
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu
a_
flatten
(
op
,
context_name
,
inputs
,
outputs
):
shp
=
[]
shp
=
[]
if
op
.
outdim
!=
1
:
if
op
.
outdim
!=
1
:
shp
=
[
inputs
[
0
]
.
shape
[
i
]
for
i
in
range
(
op
.
outdim
-
1
)]
shp
=
[
inputs
[
0
]
.
shape
[
i
]
for
i
in
range
(
op
.
outdim
-
1
)]
...
@@ -785,7 +785,7 @@ def gpu_print_wrapper(op, cnda):
...
@@ -785,7 +785,7 @@ def gpu_print_wrapper(op, cnda):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
printing
.
Print
])
@op_lifter
([
tensor
.
printing
.
Print
])
@register_opt2
([
tensor
.
printing
.
Print
],
'fast_compile'
)
@register_opt2
([
tensor
.
printing
.
Print
],
'fast_compile'
)
def
local_gpu_print_op
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu
a
_print_op
(
op
,
context_name
,
inputs
,
outputs
):
x
,
=
inputs
x
,
=
inputs
gpu_x
=
as_gpuarray_variable
(
x
,
context_name
=
context_name
)
gpu_x
=
as_gpuarray_variable
(
x
,
context_name
=
context_name
)
new_op
=
op
.
__class__
(
global_fn
=
gpu_print_wrapper
)
new_op
=
op
.
__class__
(
global_fn
=
gpu_print_wrapper
)
...
@@ -886,7 +886,7 @@ def local_gpua_join(op, context_name, inputs, outputs):
...
@@ -886,7 +886,7 @@ def local_gpua_join(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@local_optimizer
([
GpuJoin
])
@local_optimizer
([
GpuJoin
])
def
local_gpuajoin_1
(
node
):
def
local_gpua
_
join_1
(
node
):
# join of a single element
# join of a single element
if
(
isinstance
(
node
.
op
,
GpuJoin
)
and
if
(
isinstance
(
node
.
op
,
GpuJoin
)
and
len
(
node
.
inputs
)
==
2
):
len
(
node
.
inputs
)
==
2
):
...
@@ -1142,25 +1142,25 @@ def local_gpua_hgemm(op, context_name, inputs, outputs):
...
@@ -1142,25 +1142,25 @@ def local_gpua_hgemm(op, context_name, inputs, outputs):
@register_opt
()
@register_opt
()
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
)
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gpuagemm_alpha_merge
(
node
,
*
inputs
):
def
local_gpua
_
gemm_alpha_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
return
[
gpugemm_no_inplace
(
*
inputs
)]
@register_opt
()
@register_opt
()
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gpuagemm_output_merge
(
node
,
*
inputs
):
def
local_gpua
_
gemm_output_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
return
[
gpugemm_no_inplace
(
*
inputs
)]
@register_opt
()
@register_opt
()
@alpha_merge
(
GpuGemmBatch
,
alpha_in
=
1
,
beta_in
=
4
)
@alpha_merge
(
GpuGemmBatch
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gpuagemmbatch_alpha_merge
(
node
,
*
inputs
):
def
local_gpua
_
gemmbatch_alpha_merge
(
node
,
*
inputs
):
return
[
gpugemmbatch_no_inplace
(
*
inputs
)]
return
[
gpugemmbatch_no_inplace
(
*
inputs
)]
@register_opt
()
@register_opt
()
@output_merge
(
GpuGemmBatch
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
@output_merge
(
GpuGemmBatch
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gpuagemmbatch_output_merge
(
node
,
*
inputs
):
def
local_gpua
_
gemmbatch_output_merge
(
node
,
*
inputs
):
return
[
gpugemmbatch_no_inplace
(
*
inputs
)]
return
[
gpugemmbatch_no_inplace
(
*
inputs
)]
...
@@ -1193,7 +1193,7 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
...
@@ -1193,7 +1193,7 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
@op_lifter
([
tensor
.
basic
.
Eye
])
@op_lifter
([
tensor
.
basic
.
Eye
])
@register_opt2
([
tensor
.
basic
.
Eye
],
'fast_compile'
)
@register_opt2
([
tensor
.
basic
.
Eye
],
'fast_compile'
)
def
local_gpua_eye
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_eye
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuEye
(
dtype
=
op
.
dtype
,
context_name
=
context_name
)
(
*
inputs
)
return
GpuEye
(
dtype
=
op
.
dtype
,
context_name
=
context_name
)
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
...
@@ -1226,7 +1226,7 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
...
@@ -1226,7 +1226,7 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
theano
.
tensor
.
opt
.
Assert
])
@op_lifter
([
theano
.
tensor
.
opt
.
Assert
])
def
local_assert
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
assert
(
op
,
context_name
,
inputs
,
outputs
):
if
isinstance
(
inputs
[
0
]
.
type
,
GpuArrayType
):
if
isinstance
(
inputs
[
0
]
.
type
,
GpuArrayType
):
return
return
return
local_assert_graph
(
op
,
context_name
,
inputs
,
outputs
)
return
local_assert_graph
(
op
,
context_name
,
inputs
,
outputs
)
...
@@ -1241,7 +1241,7 @@ def local_assert_graph(op, context_name, inputs, outputs):
...
@@ -1241,7 +1241,7 @@ def local_assert_graph(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
ConvOp
])
@op_lifter
([
ConvOp
])
@register_opt2
([
ConvOp
],
'fast_compile'
)
@register_opt2
([
ConvOp
],
'fast_compile'
)
def
local_error_convop
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
error_convop
(
op
,
context_name
,
inputs
,
outputs
):
assert
False
,
"""
assert
False
,
"""
ConvOp does not work with the gpuarray backend.
ConvOp does not work with the gpuarray backend.
...
@@ -1253,7 +1253,7 @@ theano.tensor.nnet.conv2d()
...
@@ -1253,7 +1253,7 @@ theano.tensor.nnet.conv2d()
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
SparseBlockGemv
])
@op_lifter
([
SparseBlockGemv
])
@register_opt2
([
SparseBlockGemv
],
'fast_compile'
)
@register_opt2
([
SparseBlockGemv
],
'fast_compile'
)
def
local_lift_sparseblockgemv
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
lift_sparseblockgemv
(
op
,
context_name
,
inputs
,
outputs
):
if
op
.
inplace
:
if
op
.
inplace
:
return
gpu_sparse_block_gemv_inplace
return
gpu_sparse_block_gemv_inplace
else
:
else
:
...
@@ -1263,7 +1263,7 @@ def local_lift_sparseblockgemv(op, context_name, inputs, outputs):
...
@@ -1263,7 +1263,7 @@ def local_lift_sparseblockgemv(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
SparseBlockOuter
])
@op_lifter
([
SparseBlockOuter
])
@register_opt2
([
SparseBlockOuter
],
'fast_compile'
)
@register_opt2
([
SparseBlockOuter
],
'fast_compile'
)
def
local_lift_sparseblockouter
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
lift_sparseblockouter
(
op
,
context_name
,
inputs
,
outputs
):
if
op
.
inplace
:
if
op
.
inplace
:
return
gpu_sparse_block_outer_inplace
return
gpu_sparse_block_outer_inplace
else
:
else
:
...
@@ -1289,17 +1289,17 @@ def local_inplace_sparseblockouter(node):
...
@@ -1289,17 +1289,17 @@ def local_inplace_sparseblockouter(node):
@op_lifter
([
AbstractConv2d
,
@op_lifter
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
AbstractConv2d_gradInputs
])
def
local_lift_abstractconv2d
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
lift_abstractconv2d
(
op
,
context_name
,
inputs
,
outputs
):
if
isinstance
(
outputs
[
0
]
.
type
,
GpuArrayType
):
if
isinstance
(
outputs
[
0
]
.
type
,
GpuArrayType
):
# Don't handle this node here, it's already on the GPU.
# Don't handle this node here, it's already on the GPU.
return
return
return
local_lift_abstractconv2d_graph
(
op
,
context_name
,
inputs
,
outputs
)
return
local_
gpua_
lift_abstractconv2d_graph
(
op
,
context_name
,
inputs
,
outputs
)
@register_opt2
([
AbstractConv2d
,
@register_opt2
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
],
'fast_compile'
,
'conv_dnn'
,
'cudnn'
)
AbstractConv2d_gradInputs
],
'fast_compile'
,
'conv_dnn'
,
'cudnn'
)
def
local_lift_abstractconv2d_graph
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
lift_abstractconv2d_graph
(
op
,
context_name
,
inputs
,
outputs
):
inps
=
list
(
inputs
)
inps
=
list
(
inputs
)
inps
[
0
]
=
as_gpuarray_variable
(
inputs
[
0
],
inps
[
0
]
=
as_gpuarray_variable
(
inputs
[
0
],
context_name
=
context_name
)
context_name
=
context_name
)
...
@@ -1413,7 +1413,7 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
...
@@ -1413,7 +1413,7 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
@register_opt
(
'scan'
,
'fast_compile'
)
@register_opt
(
'scan'
,
'fast_compile'
)
@op_lifter
([
scan_op
.
Scan
])
@op_lifter
([
scan_op
.
Scan
])
@register_opt2
([
scan_op
.
Scan
],
'fast_compile'
)
@register_opt2
([
scan_op
.
Scan
],
'fast_compile'
)
def
local_scan_to_gpua
(
op
,
context_name
,
inputs
,
outputs
):
def
local_
gpua_
scan_to_gpua
(
op
,
context_name
,
inputs
,
outputs
):
info
=
copy
.
deepcopy
(
op
.
info
)
info
=
copy
.
deepcopy
(
op
.
info
)
if
info
.
get
(
'gpua'
,
False
):
if
info
.
get
(
'gpua'
,
False
):
return
return
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论