Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
51b39ada
提交
51b39ada
authored
7月 04, 2013
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1441 from nouiz/fix_opt_crash
Fix opt crash of local_gpu_lazy_ifelse.
上级
2faeb62c
5d485774
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
119 行增加
和
38 行删除
+119
-38
debugmode.py
theano/compile/debugmode.py
+4
-1
cc.py
theano/gof/cc.py
+3
-0
link.py
theano/gof/link.py
+27
-17
op.py
theano/gof/op.py
+2
-0
vm.py
theano/gof/vm.py
+3
-0
blas.py
theano/sandbox/cuda/blas.py
+6
-1
cuda_ndarray.cu
theano/sandbox/cuda/cuda_ndarray.cu
+16
-10
opt.py
theano/sandbox/cuda/opt.py
+6
-1
test_opt.py
theano/sandbox/cuda/tests/test_opt.py
+9
-8
test_ifelse.py
theano/tests/test_ifelse.py
+43
-0
没有找到文件。
theano/compile/debugmode.py
浏览文件 @
51b39ada
...
...
@@ -1636,7 +1636,8 @@ class _Linker(gof.link.LocalLinker):
if
not
isinstance
(
node
.
op
,
gof
.
op
.
Op
):
raise
utils
.
MethodNotDefined
()
e
=
FunctionGraph
(
*
graph
.
clone
(
node
.
inputs
,
node
.
outputs
))
e
.
toposort
=
lambda
:
e
.
apply_nodes
# WARNING: STOCHASTIC ORDER
# The toposort isn't a stochastic order as it contain only one node.
e
.
toposort
=
lambda
:
list
(
e
.
apply_nodes
)
# Specifically... e.nodes is a set, but of only 1 element
cl
=
CLinker
()
.
accept
(
e
,
[
r
for
r
,
r2
in
zip
(
e
.
outputs
,
...
...
@@ -1679,6 +1680,8 @@ class _Linker(gof.link.LocalLinker):
storage_map
,
compute_map
,
no_recycling
)
thunk
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunk
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
# Right now there is no op that when called check if
# its ouputs are computed and don't recompute itself.
...
...
theano/gof/cc.py
浏览文件 @
51b39ada
...
...
@@ -1498,6 +1498,9 @@ class OpWiseCLinker(link.LocalLinker):
storage_map
,
compute_map
,
no_recycling
)]
thunks
[
-
1
]
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunks
[
-
1
]
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
finally
:
node
.
op
.
_op_use_c_code
=
old_value
...
...
theano/gof/link.py
浏览文件 @
51b39ada
...
...
@@ -112,29 +112,37 @@ def raise_with_op(op, thunk=None, exc_info=None):
if
raise_with_op
.
print_thunk_trace
:
log_thunk_trace
(
exc_value
)
if
theano
.
config
.
exception_verbosity
==
'high'
:
f
=
StringIO
.
StringIO
()
theano
.
printing
.
debugprint
(
op
,
file
=
f
,
stop_on_name
=
True
)
if
thunk
is
not
None
:
detailed_err_msg
=
"
\n
Apply node that caused the error: "
+
str
(
op
)
if
thunk
is
not
None
:
if
hasattr
(
thunk
,
'inputs'
)
:
shapes
=
[
getattr
(
ipt
[
0
],
'shape'
,
'No shapes'
)
for
ipt
in
thunk
.
inputs
]
strides
=
[
getattr
(
ipt
[
0
],
'strides'
,
'No strides'
)
for
ipt
in
thunk
.
inputs
]
detailed_err_msg
=
(
"
\n
Inputs shapes:
%
s
\n
"
%
shapes
+
"Inputs strides:
%
s
\n
"
%
strides
+
"Debugprint of the apply node:
\n
"
+
f
.
getvalue
())
else
:
detailed_err_msg
=
"
\n
Debugprint of the apply node:
\n
"
+
f
.
getvalue
()
shapes
=
"The thunk don't have an inputs attributes."
strides
=
"So we can't access the storage inputs value"
types
=
[
getattr
(
ipt
,
'type'
,
'No type'
)
for
ipt
in
op
.
inputs
]
detailed_err_msg
+=
(
"
\n
Inputs shapes:
%
s"
%
shapes
+
"
\n
Inputs strides:
%
s"
%
strides
+
"
\n
Inputs types:
%
s"
%
types
)
else
:
detailed_err_msg
+=
(
"
\n
Use another linker then the c linker to"
" have the inputs shapes and strides printed."
)
if
theano
.
config
.
exception_verbosity
==
'high'
:
f
=
StringIO
.
StringIO
()
theano
.
printing
.
debugprint
(
op
,
file
=
f
,
stop_on_name
=
True
,
print_type
=
True
)
detailed_err_msg
+=
"
\n
Debugprint of the apply node:
\n
"
+
f
.
getvalue
()
else
:
detailed_err_msg
=
(
"
\n
Use the Theano flag"
" 'exception_verbosity=high' for more"
" information on the inputs of this apply"
" node."
)
exc_value
=
exc_type
(
str
(
exc_value
)
+
"
\n
Apply node that caused the error: "
+
str
(
op
)
+
detailed_err_msg
)
detailed_err_msg
+=
(
"
\n
Use the Theano flag 'exception_verbosity=high'"
" for a debugprint of this apply node."
)
exc_value
=
exc_type
(
str
(
exc_value
)
+
detailed_err_msg
)
raise
exc_type
,
exc_value
,
exc_trace
raise_with_op
.
print_thunk_trace
=
False
...
...
@@ -523,6 +531,8 @@ class PerformLinker(LocalLinker):
storage_map
,
compute_map
,
no_recycling
)]
thunks
[
-
1
]
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunks
[
-
1
]
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
finally
:
node
.
op
.
_op_use_c_code
=
old_value
...
...
theano/gof/op.py
浏览文件 @
51b39ada
...
...
@@ -431,6 +431,8 @@ class PureOp(object):
# compute output value once with test inputs to validate graph
thunk
=
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
=
[])
thunk
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunk
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
required
=
thunk
()
assert
not
required
# We provided all inputs
...
...
theano/gof/vm.py
浏览文件 @
51b39ada
...
...
@@ -843,6 +843,9 @@ class VM_Linker(link.LocalLinker):
compute_map
,
no_recycling
)
for
node
in
order
]
for
node
,
thunk
in
zip
(
order
,
thunks
):
thunk
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunk
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
computed
,
last_user
=
link
.
gc_helper
(
order
)
if
self
.
allow_gc
:
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
51b39ada
...
...
@@ -652,7 +652,7 @@ class GpuConv(GpuOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
19
)
return
(
0
,
20
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -704,6 +704,7 @@ class GpuConv(GpuOp):
return NULL;
}
// TODO, make out be decref before we alloc out2!
CudaNdarray * out2 = (CudaNdarray *)CudaNdarray_Conv(
%(img)
s,
%(kern)
s,
%(out)
s, mode,
dx, dy,
...
...
@@ -711,6 +712,10 @@ class GpuConv(GpuOp):
%(max_threads_dim0)
s);
Py_XDECREF(
%(out)
s);
%(out)
s = out2;
if (
%(out)
s==NULL){
%(fail)
s
}
"""
%
sub
...
...
theano/sandbox/cuda/cuda_ndarray.cu
浏览文件 @
51b39ada
...
...
@@ -1811,9 +1811,10 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
{
PyErr_Format
(
PyExc_RuntimeError
,
"Cuda error: %s: %s.
\n
"
,
"k4"
,
cudaGetErrorString
(
err
));
"Cuda error: %s: %s. n_block=(%ld,%ld) n_threads=%ld
\n
"
,
"k5 with loop over k4"
,
cudaGetErrorString
(
err
),
(
long
)
n_blocks
.
x
,
(
long
)
n_blocks
.
y
,
(
long
)
n_threads
.
x
);
Py_XDECREF
(
new_other
);
return
-
1
;
}
...
...
@@ -1831,14 +1832,17 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
);
while
(
n_blocks
.
x
*
n_blocks
.
y
>
NUM_VECTOR_OP_BLOCKS
)
n_blocks
.
y
/=
2
;
while
(
n_blocks
.
x
*
n_blocks
.
y
*
n_blocks
.
z
>
NUM_VECTOR_OP_BLOCKS
)
n_blocks
.
z
/=
2
;
// GTX285(compute capabilities 1.3) don't support n_blocks.z > 1
// (compute capabilities 2.0) support 65535 for n_blocks.z
//while (n_blocks.x * n_blocks.y * n_blocks.z > NUM_VECTOR_OP_BLOCKS)
// n_blocks.z /= 2;
n_blocks
.
z
=
1
;
dim3
n_threads
(
std
::
min
(
CudaNdarray_HOST_DIMS
(
self
)[
3
],
NUM_VECTOR_OP_THREADS_PER_BLOCK
)
//TODO: DON
"T YOU NEED OT
PUT DIMS[4] in here???
//TODO: DON
"T YOU NEED OT
PUT DIMS[5] in here???
//TODO: DON
'T YOU NEED TO
PUT DIMS[4] in here???
//TODO: DON
'T YOU NEED TO
PUT DIMS[5] in here???
);
k6
<<<
n_blocks
,
n_threads
>>>
(
CudaNdarray_HOST_DIMS
(
self
)[
0
],
...
...
@@ -1867,9 +1871,11 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
{
PyErr_Format
(
PyExc_RuntimeError
,
"Cuda error: %s: %s.
\n
"
,
"k4"
,
cudaGetErrorString
(
err
));
"Cuda error: %s: %s. n_blocks=(%ld, %ld, %ld) n_threads=(%ld)
\n
"
,
"k6"
,
cudaGetErrorString
(
err
),
(
long
)
n_blocks
.
x
,
(
long
)
n_blocks
.
y
,
(
long
)
n_blocks
.
z
,
(
long
)
n_threads
.
x
);
Py_XDECREF
(
new_other
);
return
-
1
;
}
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
51b39ada
...
...
@@ -403,7 +403,12 @@ def local_gpu_lazy_ifelse(node):
host_input
=
node
.
inputs
[
0
]
if
(
host_input
.
owner
and
isinstance
(
host_input
.
owner
.
op
,
theano
.
ifelse
.
IfElse
)
and
not
host_input
.
owner
.
op
.
gpu
):
not
host_input
.
owner
.
op
.
gpu
and
# If there is more then 1 outputs, we can't replace it
# here with a local optimizer as we replace the
# GpuFromHost node and the other output of the if won't be
# replaced.
host_input
.
owner
.
op
.
n_outs
==
1
):
gpu_ifelse
=
theano
.
ifelse
.
IfElse
(
host_input
.
owner
.
op
.
n_outs
,
gpu
=
True
)
...
...
theano/sandbox/cuda/tests/test_opt.py
浏览文件 @
51b39ada
import
sys
,
time
,
unittest
import
sys
import
numpy
# Skip test if cuda_ndarray is not available.
...
...
@@ -7,7 +7,7 @@ from nose.plugins.skip import SkipTest
import
theano
from
theano.compile.pfunc
import
pfunc
from
theano
import
config
,
tensor
import
theano.sandbox.linalg.tests
import
theano.sandbox.linalg.tests
.test_linalg
from
theano.tests
import
unittest_tools
as
utt
...
...
@@ -48,28 +48,29 @@ def test_int_pow():
op_names
=
[
n
.
op
.
__class__
.
__name__
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
op_names
==
[
'GpuCAReduce'
,
'GpuElemwise'
,
'HostFromGpu'
]
f
=
theano
.
function
([
a
],
tensor
.
pow
(
a
,
4
)
.
sum
(),
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
a
],
tensor
.
pow
(
a
,
4
)
.
sum
(),
mode
=
mode_with_gpu
)
op_names
=
[
n
.
op
.
__class__
.
__name__
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
op_names
==
[
'GpuElemwise'
,
'GpuCAReduce'
,
'HostFromGpu'
]
#theano.printing.debugprint(f)
def
test_gpualloc
():
'''
This tests tries to catch the scenario when, due to infer_shape,
the input of the alloc changes from te
sn
or scalar to a constant
the input of the alloc changes from te
ns
or scalar to a constant
1. In this case the original constracted broadcastable pattern will
have a False for that dimension, but the new broadcastable pattern
that will be inserted by gpualloc will have a True since it knows the
dimension is 1 and therefore broadcastable.
'''
x
=
theano
.
shared
(
numpy
.
ones
(
3
,
dtype
=
'float32'
),
'x'
)
m
=
(
x
)
.
dimshuffle
([
'x'
,
0
])
x
=
theano
.
shared
(
numpy
.
ones
(
3
,
dtype
=
'float32'
),
'x'
)
m
=
(
x
)
.
dimshuffle
([
'x'
,
0
])
v
=
tensor
.
alloc
(
1.
,
*
m
.
shape
)
f
=
theano
.
function
([],
v
+
x
)
f
=
theano
.
function
([],
v
+
x
,
mode
=
mode_with_gpu
)
l
=
f
.
maker
.
fgraph
.
toposort
()
assert
numpy
.
any
(
ininstance
(
x
.
op
,
cuda
.
GpuAlloc
)
for
x
in
l
)
assert
numpy
.
any
(
[
isinstance
(
x
.
op
,
cuda
.
GpuAlloc
)
for
x
in
l
]
)
def
test_alloc_memset_0
():
...
...
theano/tests/test_ifelse.py
浏览文件 @
51b39ada
...
...
@@ -159,6 +159,49 @@ class test_ifelse(unittest.TestCase, utt.TestOptimizationMixin):
assert
numpy
.
all
(
outs_0
[
2
]
==
1.
)
assert
numpy
.
all
(
outs_0
[
3
]
==
1.
)
def
test_multiple_out_crash
(
self
):
# This test failed up to commit 2faeb62c38
p0
=
self
.
shared
(
numpy
.
asarray
(
numpy
.
random
.
random
([
4
,
8
]),
dtype
=
self
.
dtype
))
p1
=
self
.
shared
(
numpy
.
asarray
(
numpy
.
random
.
random
(
8
),
dtype
=
self
.
dtype
))
p2
=
self
.
shared
(
numpy
.
asarray
(
numpy
.
random
.
random
([
8
,
3
]),
dtype
=
self
.
dtype
))
p3
=
self
.
shared
(
numpy
.
asarray
(
numpy
.
random
.
random
(
3
),
dtype
=
self
.
dtype
))
p
=
[
p0
,
p1
,
p2
,
p3
]
# in my code these vars are the result of applying scan
ften0
=
tensor
.
tensor3
(
'ft0'
,
dtype
=
self
.
dtype
)
fmat1
=
tensor
.
matrix
(
'fm1'
,
dtype
=
self
.
dtype
)
ften2
=
tensor
.
tensor3
(
'ft2'
,
dtype
=
self
.
dtype
)
fmat3
=
tensor
.
matrix
(
'fm3'
,
dtype
=
self
.
dtype
)
# then I keep only the last iteration
fsub0
=
ften0
[
-
1
]
fsub1
=
fmat1
[
-
1
]
fsub2
=
ften2
[
-
1
]
fsub3
=
fmat3
[
-
1
]
fsub
=
[
fsub0
,
fsub1
,
fsub2
,
fsub3
]
acc
=
theano
.
tensor
.
constant
(
1
,
'int8'
)
>=
0
new_positions
=
theano
.
ifelse
.
ifelse
(
acc
,
fsub
,
p
)
new_updates
=
[(
p
[
0
],
new_positions
[
0
])]
f
=
theano
.
function
([
ften0
,
fmat1
,
ften2
,
fmat3
],
[],
updates
=
new_updates
,
mode
=
self
.
mode
)
self
.
assertFunctionContains1
(
f
,
self
.
get_ifelse
(
4
))
i1
=
numpy
.
asarray
(
numpy
.
random
.
random
([
19
,
4
,
8
]),
dtype
=
self
.
dtype
)
i2
=
numpy
.
asarray
(
numpy
.
random
.
random
([
19
,
8
]),
dtype
=
self
.
dtype
)
i3
=
numpy
.
asarray
(
numpy
.
random
.
random
([
19
,
8
,
3
]),
dtype
=
self
.
dtype
)
i4
=
numpy
.
asarray
(
numpy
.
random
.
random
([
19
,
3
]),
dtype
=
self
.
dtype
)
f
(
i1
,
i2
,
i3
,
i4
)
def
test_dtype_mismatch
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
data
=
rng
.
rand
(
5
)
.
astype
(
self
.
dtype
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论