Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
e6a4c073
提交
e6a4c073
authored
6月 04, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2423 from carriepl/breakpoint
Add breakpoint Op to Theano
上级
944e36dd
495e144e
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
398 行增加
和
2 行删除
+398
-2
opt.py
theano/sandbox/cuda/opt.py
+65
-0
test_opt.py
theano/sandbox/cuda/tests/test_opt.py
+22
-2
opt.py
theano/sandbox/gpuarray/opt.py
+64
-0
test_opt.py
theano/sandbox/gpuarray/tests/test_opt.py
+20
-0
breakpoint.py
theano/tests/breakpoint.py
+146
-0
test_breakpoint.py
theano/tests/test_breakpoint.py
+81
-0
没有找到文件。
theano/sandbox/cuda/opt.py
浏览文件 @
e6a4c073
...
...
@@ -63,6 +63,7 @@ from theano.tensor import nlinalg
from
theano.tensor
import
slinalg
from
theano.tensor.nnet.Conv3D
import
Conv3D
from
theano.tests.breakpoint
import
PdbBreakpoint
try
:
# We need to be able to import this file even if cuda isn't avail.
...
...
@@ -1141,6 +1142,69 @@ def local_gpu_print_op(node):
return
False
@register_opt
()
@local_optimizer
([
PdbBreakpoint
])
def
local_gpu_pdbbreakpoint_op
(
node
):
if
isinstance
(
node
.
op
,
PdbBreakpoint
):
old_inputs
=
node
.
inputs
old_outputs
=
node
.
outputs
new_inputs
=
node
.
inputs
[:
1
]
input_transfered
=
[]
# Go through the monitored variables, only transfering on GPU those
# for which the input comes from the GPU or the output will be
# transfered on the GPU.
nb_monitored_vars
=
len
(
node
.
outputs
)
for
i
in
range
(
nb_monitored_vars
):
inp
=
old_inputs
[
i
+
1
]
out
=
old_outputs
[
i
]
input_is_from_gpu
=
(
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
output_goes_to_gpu
=
any
([
c
[
0
]
!=
"output"
and
isinstance
(
c
[
0
]
.
op
,
GpuFromHost
)
for
c
in
out
.
clients
])
if
input_is_from_gpu
:
# The op should be applied on the GPU version of the input
new_inputs
.
append
(
inp
.
owner
.
inputs
[
0
])
input_transfered
.
append
(
True
)
elif
output_goes_to_gpu
:
# The input should be transfered to the gpu
new_inputs
.
append
(
gpu_from_host
(
inp
))
input_transfered
.
append
(
True
)
else
:
# No transfer is required.
new_inputs
.
append
(
inp
)
input_transfered
.
append
(
False
)
# Only continue the optimization if at least one input has been
# transfered to the gpu
if
not
any
(
input_transfered
):
return
False
# Apply the op on the new inputs
new_op_outputs
=
node
.
op
(
*
new_inputs
,
return_list
=
True
)
# Propagate the transfer to the gpu through the outputs that require
# it
new_outputs
=
[]
for
i
in
range
(
len
(
new_op_outputs
)):
if
input_transfered
[
i
]:
new_outputs
.
append
(
host_from_gpu
(
new_op_outputs
[
i
]))
else
:
new_outputs
.
append
(
new_op_outputs
[
i
])
return
new_outputs
return
False
def
cast
(
x
,
dtype
):
stype
=
scal
.
Scalar
(
dtype
)
cast_op
=
theano
.
tensor
.
Elemwise
(
scal
.
Identity
(
scal
.
specific_out
(
stype
)))
...
...
@@ -2303,6 +2367,7 @@ def local_gpu_allocempty(node):
return
[
ret
]
return
False
optdb
.
register
(
'gpu_scanOp_make_inplace'
,
scan_opt
.
ScanInplaceOptimizer
(
typeConstructor
=
typeConstructor
,
gpu_flag
=
True
),
...
...
theano/sandbox/cuda/tests/test_opt.py
浏览文件 @
e6a4c073
...
...
@@ -14,6 +14,7 @@ from theano import config, tensor
import
theano.tensor.tests.test_nlinalg
import
theano.tensor.tests.test_opt
as
test_opt
from
theano.tests.breakpoint
import
PdbBreakpoint
from
theano.tests
import
unittest_tools
as
utt
import
theano.sandbox.cuda
as
cuda
...
...
@@ -164,7 +165,7 @@ def test_gpuallocempty():
f_cpu
=
theano
.
function
([],
tensor
.
AllocEmpty
(
'int32'
)(
2
,
3
))
l_cpu
=
f_cpu
.
maker
.
fgraph
.
toposort
()
assert
not
numpy
.
any
([
isinstance
(
x
.
op
,
basic_ops
.
GpuAllocEmpty
)
for
x
in
l_cpu
])
assert
not
numpy
.
any
([
isinstance
(
x
.
op
,
basic_ops
.
GpuAllocEmpty
)
for
x
in
l_cpu
])
class
Test_local_elemwise_alloc
(
test_opt
.
Test_local_elemwise_alloc
):
dtype
=
'float32'
...
...
@@ -322,7 +323,7 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
def
test_opt_gpujoin_joinvectors_negativeaxes
():
"""
"""
Test that negative axis concatenation works as expected.
"""
...
...
@@ -477,6 +478,25 @@ def test_print_op():
f
(
numpy
.
random
.
random
((
5
,
5
))
.
astype
(
'float32'
))
def
test_pdbbreakpoint_op
():
""" Test that PdbBreakpoint ops don't block gpu optimization"""
b
=
tensor
.
fmatrix
()
# Create a function composed of a breakpoint followed by
# some computation
condition
=
tensor
.
gt
(
b
.
sum
(),
0
)
b_monitored
=
PdbBreakpoint
(
name
=
'TestBreakpoint'
)(
condition
,
b
)
output
=
b_monitored
**
2
f
=
theano
.
function
([
b
],
output
,
mode
=
mode_with_gpu
)
# Ensure that, in the compiled function, the computation following the
# breakpoint has been moved to the gpu.
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
isinstance
(
topo
[
-
2
]
.
op
,
cuda
.
GpuElemwise
)
assert
topo
[
-
1
]
.
op
==
cuda
.
host_from_gpu
def
test_huge_elemwise_fusion
():
""" Test the the GpuElemwise fusion work correctly
We check that we fuse one node with part of its input
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
e6a4c073
...
...
@@ -15,6 +15,7 @@ from theano.gof import (local_optimizer, EquilibriumDB,
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.tensor.nnet.conv
import
ConvOp
from
theano.tests.breakpoint
import
PdbBreakpoint
from
.type
import
GpuArrayType
,
GpuArrayConstant
from
.basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
HostFromGpu
,
GpuFromHost
,
...
...
@@ -330,6 +331,69 @@ def local_gpu_print_op(node):
return
new_op
(
gpu_x
)
@register_opt
(
'fast_compile'
)
@local_optimizer
([
PdbBreakpoint
])
def
local_gpu_pdbbreakpoint_op
(
node
):
if
isinstance
(
node
.
op
,
PdbBreakpoint
):
old_inputs
=
node
.
inputs
old_outputs
=
node
.
outputs
new_inputs
=
node
.
inputs
[:
1
]
input_transfered
=
[]
# Go through the monitored variables, only transfering on GPU those
# for which the input comes from the GPU or the output will be
# transfered on the GPU.
nb_monitored_vars
=
len
(
node
.
outputs
)
for
i
in
range
(
nb_monitored_vars
):
inp
=
old_inputs
[
i
+
1
]
out
=
old_outputs
[
i
]
input_is_from_gpu
=
(
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
output_goes_to_gpu
=
any
([
c
[
0
]
!=
"output"
and
isinstance
(
c
[
0
]
.
op
,
GpuFromHost
)
for
c
in
out
.
clients
])
if
input_is_from_gpu
:
# The op should be applied on the GPU version of the input
new_inputs
.
append
(
inp
.
owner
.
inputs
[
0
])
input_transfered
.
append
(
True
)
elif
output_goes_to_gpu
:
# The input should be transfered to the gpu
new_inputs
.
append
(
gpu_from_host
(
inp
))
input_transfered
.
append
(
True
)
else
:
# No transfer is required.
new_inputs
.
append
(
inp
)
input_transfered
.
append
(
False
)
# Only continue the optimization if at least one input has been
# transfered to the gpu
if
not
any
(
input_transfered
):
return
False
# Apply the op on the new inputs
new_op_outputs
=
node
.
op
(
*
new_inputs
,
return_list
=
True
)
# Propagate the transfer to the gpu through the outputs that require
# it
new_outputs
=
[]
for
i
in
range
(
len
(
new_op_outputs
)):
if
input_transfered
[
i
]:
new_outputs
.
append
(
host_from_gpu
(
new_op_outputs
[
i
]))
else
:
new_outputs
.
append
(
new_op_outputs
[
i
])
return
new_outputs
return
False
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
Join
])
def
local_gpua_join
(
node
):
...
...
theano/sandbox/gpuarray/tests/test_opt.py
浏览文件 @
e6a4c073
...
...
@@ -2,6 +2,7 @@ import numpy
import
theano
from
theano
import
tensor
from
theano.tests.breakpoint
import
PdbBreakpoint
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests.unittest_tools
import
SkipTest
from
theano.tensor.tests
import
test_basic
...
...
@@ -186,6 +187,25 @@ def test_print_op():
f
(
numpy
.
random
.
random
((
5
,
5
))
.
astype
(
'float32'
))
def
test_pdbbreakpoint_op
():
""" Test that PdbBreakpoint ops don't block gpu optimization"""
b
=
tensor
.
fmatrix
()
# Create a function composed of a breakpoint followed by
# some computation
condition
=
tensor
.
gt
(
b
.
sum
(),
0
)
b_monitored
=
PdbBreakpoint
(
name
=
'TestBreakpoint'
)(
condition
,
b
)
output
=
b_monitored
**
2
f
=
theano
.
function
([
b
],
output
,
mode
=
mode_with_gpu
)
# Ensure that, in the compiled function, the computation following the
# breakpoint has been moved to the gpu.
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
isinstance
(
topo
[
-
2
]
.
op
,
GpuElemwise
)
assert
topo
[
-
1
]
.
op
==
host_from_gpu
def
test_local_gpu_elemwise_careduce
():
x
=
theano
.
tensor
.
matrix
()
o
=
(
x
*
x
)
.
sum
()
...
...
theano/tests/breakpoint.py
0 → 100644
浏览文件 @
e6a4c073
import
numpy
import
pdb
import
theano
from
theano.gof
import
Op
,
Apply
from
theano.gradient
import
DisconnectedType
class
PdbBreakpoint
(
Op
):
"""
This is an identity-like op with the side effect of enforcing a
conditional breakpoint, inside a theano function, based on a symbolic
scalar condition.
@type name: String
@param name: name of the conditional breakpoint. To be printed when the
breakpoint is activated.
:note: WARNING. At least one of the outputs of the op must be used
otherwise the op will be removed from the Theano graph
due to its outputs being unused
:note: WARNING. Employing the function inside a theano graph can prevent
Theano from applying certain optimizations to improve
performance, reduce memory consumption and/or reduce
numerical instability.
Detailed explanation:
As of 2014-12-01 the PdbBreakpoint op is not known by any
optimization. Setting a PdbBreakpoint op in the middle of a
pattern that is usually optimized out will block the optimization.
Example:
.. code-block:: python
import theano
import theano.tensor as T
from theano.tests.breakpoint import PdbBreakpoint
input = T.fvector()
target = T.fvector()
# Mean squared error between input and target
mse = (input - target) ** 2
# Conditional breakpoint to be activated if the total MSE is higher
# than 100. The breakpoint will monitor the inputs, targets as well
# as the individual error values
breakpointOp = PdbBreakpoint("MSE too high")
condition = T.gt(mse.sum(), 100)
mse, monitored_input, monitored_target = breakpointOp(condition, mse,
input, target)
# Compile the theano function
fct = theano.function([input, target], mse)
# Use the function
print fct([10, 0], [10, 5]) # Will NOT activate the breakpoint
print fct([0, 0], [10, 5]) # Will activate the breakpoint
"""
__props__
=
(
"name"
,)
def
__init__
(
self
,
name
):
self
.
name
=
name
def
make_node
(
self
,
condition
,
*
monitored_vars
):
# Ensure that condition is a theano tensor
if
not
isinstance
(
condition
,
theano
.
Variable
):
condition
=
theano
.
tensor
.
as_tensor_variable
(
condition
)
# Validate that the condition is a scalar (else it is not obvious how
# is should be evaluated)
assert
(
condition
.
ndim
==
0
)
# Because the user might be tempted to instantiate PdbBreakpoint only
# once and apply it many times on different number of inputs, we must
# create a new instance of the op here, define the instance attributes
# (view_map and var_types) in that instance and then apply it on the
# inputs.
new_op
=
PdbBreakpoint
(
name
=
self
.
name
)
new_op
.
view_map
=
{}
new_op
.
inp_types
=
[]
for
i
in
range
(
len
(
monitored_vars
)):
# Every output i is a view of the input i+1 because of the input
# condition.
new_op
.
view_map
[
i
]
=
[
i
+
1
]
new_op
.
inp_types
.
append
(
monitored_vars
[
i
]
.
type
)
# Build the Apply node
inputs
=
[
condition
]
+
list
(
monitored_vars
)
outputs
=
[
inp
.
type
()
for
inp
in
monitored_vars
]
return
Apply
(
op
=
new_op
,
inputs
=
inputs
,
outputs
=
outputs
)
def
perform
(
self
,
node
,
inputs
,
output_storage
):
condition
=
inputs
[
0
]
if
condition
:
try
:
monitored
=
[
numpy
.
asarray
(
inp
)
for
inp
in
inputs
[
1
:]]
except
:
raise
ValueError
(
"Some of the inputs to the PdbBreakpoint op "
"'
%
s' could not be casted to NumPy arrays"
%
self
.
name
)
print
(
"
\n
"
)
print
(
"-------------------------------------------------"
)
print
(
"Conditional breakpoint '
%
s' activated
\n
"
%
self
.
name
)
print
(
"The monitored variables are stored, in order,"
)
print
(
"in the list variable 'monitored' as NumPy arrays.
\n
"
)
print
(
"Their contents can be altered and, when execution"
)
print
(
"resumes, the updated values will be used."
)
print
(
"-------------------------------------------------"
)
pdb
.
set_trace
()
# Take the new values in monitored, cast them back to their
# original type and store them in the output_storage
for
i
in
range
(
len
(
output_storage
)):
output_storage
[
i
][
0
]
=
self
.
inp_types
[
i
]
.
filter
(
monitored
[
i
])
else
:
# Simply return views on the monitored variables
for
i
in
range
(
len
(
output_storage
)):
output_storage
[
i
][
0
]
=
inputs
[
i
+
1
]
def
grad
(
self
,
inputs
,
output_gradients
):
return
([
DisconnectedType
()()]
+
output_gradients
)
def
infer_shape
(
self
,
inputs
,
input_shapes
):
# Return the shape of every input but the condition (first input)
return
input_shapes
[
1
:]
def
connection_pattern
(
self
,
node
):
nb_inp
=
len
(
node
.
inputs
)
nb_out
=
nb_inp
-
1
# First input is connected to no output and every other input n is
# connected to input n-1
connections
=
[[
out_idx
==
inp_idx
-
1
for
out_idx
in
range
(
nb_out
)]
for
inp_idx
in
range
(
nb_inp
)]
return
connections
theano/tests/test_breakpoint.py
0 → 100644
浏览文件 @
e6a4c073
import
numpy
import
theano
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests.breakpoint
import
PdbBreakpoint
class
TestPdbBreakpoint
(
utt
.
InferShapeTester
):
def
setUp
(
self
):
super
(
TestPdbBreakpoint
,
self
)
.
setUp
()
# Sample computation that involves tensors with different numbers
# of dimensions
self
.
input1
=
T
.
fmatrix
()
self
.
input2
=
T
.
fscalar
()
self
.
output
=
T
.
dot
((
self
.
input1
-
self
.
input2
),
(
self
.
input1
-
self
.
input2
)
.
transpose
())
# Declare the conditional breakpoint
self
.
breakpointOp
=
PdbBreakpoint
(
"Sum of output too high"
)
self
.
condition
=
T
.
gt
(
self
.
output
.
sum
(),
1000
)
(
self
.
monitored_input1
,
self
.
monitored_input2
,
self
.
monitored_output
)
=
self
.
breakpointOp
(
self
.
condition
,
self
.
input1
,
self
.
input2
,
self
.
output
)
def
test_infer_shape
(
self
):
input1_value
=
numpy
.
arange
(
6
)
.
reshape
(
2
,
3
)
.
astype
(
"float32"
)
input2_value
=
10.0
self
.
_compile_and_check
([
self
.
input1
,
self
.
input2
],
[
self
.
monitored_input1
,
self
.
monitored_input2
,
self
.
monitored_output
],
[
input1_value
,
input2_value
],
PdbBreakpoint
)
def
test_grad
(
self
):
input1_value
=
numpy
.
arange
(
9
)
.
reshape
(
3
,
3
)
.
astype
(
"float32"
)
input2_value
=
10.0
grads
=
[
T
.
grad
(
self
.
monitored_input1
.
sum
(),
self
.
input1
),
T
.
grad
(
self
.
monitored_input2
.
sum
(),
self
.
input2
)]
# Add self.monitored_input1 as an output to the Theano function to
# prevent Theano from optimizing the PdbBreakpoint op out of the
# function graph
fct
=
theano
.
function
([
self
.
input1
,
self
.
input2
],
grads
+
[
self
.
monitored_input1
])
gradients
=
fct
(
input1_value
,
input2_value
)[:
-
1
]
expected_gradients
=
[
numpy
.
ones
((
3
,
3
),
dtype
=
"float32"
),
numpy
.
array
(
1.
,
dtype
=
"float32"
)]
for
i
in
range
(
len
(
gradients
)):
numpy
.
testing
.
assert_allclose
(
gradients
[
i
],
expected_gradients
[
i
])
def
test_fprop
(
self
):
input1_value
=
numpy
.
arange
(
9
)
.
reshape
(
3
,
3
)
.
astype
(
"float32"
)
input2_value
=
10.0
fct
=
theano
.
function
([
self
.
input1
,
self
.
input2
],
[
self
.
monitored_input1
,
self
.
monitored_input2
])
output
=
fct
(
input1_value
,
input2_value
)
numpy
.
testing
.
assert_allclose
(
output
[
0
],
input1_value
)
numpy
.
testing
.
assert_allclose
(
output
[
1
],
input2_value
)
def
test_connection_pattern
(
self
):
node
=
self
.
monitored_output
.
owner
connection_pattern
=
self
.
breakpointOp
.
connection_pattern
(
node
)
expected_pattern
=
[[
0
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]]
assert
connection_pattern
==
expected_pattern
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论