Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9433d5d2
提交
9433d5d2
authored
7月 06, 2011
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
vm linker
上级
2677e15f
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
1651 行增加
和
19 行删除
+1651
-19
function_module.py
theano/compile/function_module.py
+1
-0
mode.py
theano/compile/mode.py
+27
-8
test_modes.py
theano/compile/tests/test_modes.py
+2
-0
configdefaults.py
theano/configdefaults.py
+4
-11
lazylinker_c.c
theano/gof/lazylinker_c.c
+799
-0
lazylinker_c.py
theano/gof/lazylinker_c.py
+20
-0
test_vm.py
theano/gof/tests/test_vm.py
+239
-0
vm.py
theano/gof/vm.py
+559
-0
没有找到文件。
theano/compile/function_module.py
浏览文件 @
9433d5d2
...
...
@@ -331,6 +331,7 @@ class Function(object):
self
.
unpack_single
=
unpack_single
self
.
return_none
=
return_none
self
.
maker
=
maker
self
.
profile
=
None
# reassigned in FunctionMaker.create
# We will be popping stuff off this `containers` object. It is a copy.
containers
=
list
(
self
.
input_storage
)
...
...
theano/compile/mode.py
浏览文件 @
9433d5d2
...
...
@@ -4,7 +4,9 @@ import os, logging
import
numpy
,
theano
from
theano
import
gof
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
import
theano.gof.vm
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
,
EnumStr
_logger
=
logging
.
getLogger
(
'theano.compile.mode'
)
...
...
@@ -55,7 +57,11 @@ predefined_linkers = {
'c'
:
gof
.
CLinker
(),
'c|py'
:
gof
.
OpWiseCLinker
(
allow_gc
=
True
),
'c|py_nogc'
:
gof
.
OpWiseCLinker
(
allow_gc
=
False
),
'c&py'
:
gof
.
DualLinker
(
checker
=
check_equal
)
'c&py'
:
gof
.
DualLinker
(
checker
=
check_equal
),
'vm'
:
gof
.
vm
.
VM_Linker
(
allow_gc
=
True
,
use_cloop
=
False
),
'cvm'
:
gof
.
vm
.
VM_Linker
(
allow_gc
=
True
,
use_cloop
=
True
),
'vm_nogc'
:
gof
.
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
False
),
'cvm_nogc'
:
gof
.
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
True
),
}
...
...
@@ -249,6 +255,7 @@ class Mode(object):
self
.
_optimizer
=
optimizer
self
.
call_time
=
0
self
.
fn_time
=
0
linker
.
mode
=
self
#TODO: WHY IS THIS HERE?
self
.
optimizer_time
=
0
self
.
linker_time
=
0
...
...
@@ -290,15 +297,27 @@ class Mode(object):
FAST_COMPILE
=
Mode
(
'py'
,
'fast_compile'
)
FAST_RUN
=
Mode
(
'c|py'
,
'fast_run'
)
FAST_RUN_NOGC
=
Mode
(
"c|py_nogc"
,
'fast_run'
)
SANITY_CHECK
=
[
Mode
(
'c|py'
,
None
),
Mode
(
'c|py'
,
'fast_run'
)]
STABILIZE
=
Mode
(
"c|py"
,
OPT_STABILIZE
)
predefined_modes
=
{
'FAST_COMPILE'
:
FAST_COMPILE
,
'FAST_RUN'
:
FAST_RUN
,
'FAST_RUN_NOGC'
:
FAST_RUN_NOGC
,
'SANITY_CHECK'
:
SANITY_CHECK
,
'STABILIZE'
:
STABILIZE
}
'STABILIZE'
:
STABILIZE
,
'VM'
:
Mode
(
'vm'
,
'fast_run'
),
'VM_NOGC'
:
Mode
(
'vm_nogc'
,
'fast_run'
),
'CVM'
:
Mode
(
'cvm'
,
'fast_run'
),
'CVM_NOGC'
:
Mode
(
'cvm_nogc'
,
'fast_run'
),
}
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar
(
'mode'
,
"Default compilation mode"
,
EnumStr
(
*
(
predefined_modes
.
keys
()
+
[
'Mode'
,
'DEBUG_MODE'
,
'PROFILE_MODE'
])),
in_c_key
=
False
)
instanciated_default_mode
=
None
def
get_mode
(
orig_string
):
...
...
@@ -329,7 +348,7 @@ def get_mode(orig_string):
ret
=
DebugMode
(
optimizer
=
config
.
optimizer
)
else
:
# The import is needed in case string is ProfileMode
from
profilemode
import
ProfileMode
from
profilemode
import
ProfileMode
,
prof_mode_instance_to_print
ret
=
eval
(
string
+
'(linker=config.linker, optimizer=config.optimizer)'
)
elif
predefined_modes
.
has_key
(
string
):
ret
=
predefined_modes
[
string
]
...
...
@@ -349,7 +368,6 @@ def get_mode(orig_string):
#must tell python to print the summary at the end.
if
string
==
'ProfileMode'
:
#need to import later to break circular dependency.
from
profilemode
import
prof_mode_instance_to_print
prof_mode_instance_to_print
.
append
(
ret
)
return
ret
...
...
@@ -365,3 +383,4 @@ def register_mode(name, mode):
if
name
in
predefined_modes
:
raise
ValueError
(
'Mode name already taken:
%
s'
%
name
)
predefined_modes
[
name
]
=
mode
theano/compile/tests/test_modes.py
浏览文件 @
9433d5d2
...
...
@@ -10,6 +10,8 @@ import random
import
numpy.random
from
theano.tests
import
unittest_tools
as
utt
import
theano.tensor
as
T
class
T_bunch_of_modes
(
unittest
.
TestCase
):
...
...
theano/configdefaults.py
浏览文件 @
9433d5d2
...
...
@@ -65,15 +65,6 @@ AddConfigVar('force_device',
BoolParam
(
False
,
allow_override
=
False
),
in_c_key
=
False
)
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar
(
'mode'
,
"Default compilation mode"
,
EnumStr
(
'Mode'
,
'ProfileMode'
,
'DebugMode'
,
'FAST_RUN'
,
'FAST_COMPILE'
,
'PROFILE_MODE'
,
'DEBUG_MODE'
),
in_c_key
=
False
)
# Test whether or not gcc is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None.
...
...
@@ -84,13 +75,15 @@ try:
# Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar
(
'linker'
,
"Default linker used if the theano flags mode is Mode or ProfileMode"
,
EnumStr
(
'c|py'
,
'py'
,
'c'
,
'c|py_nogc'
,
'c&py'
),
EnumStr
(
'c|py'
,
'py'
,
'c'
,
'c|py_nogc'
,
'c&py'
,
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
in_c_key
=
False
)
except
OSError
:
# gcc is not present, linker should default to python only
AddConfigVar
(
'linker'
,
"Default linker used if the theano flags mode is Mode or ProfileMode"
,
EnumStr
(
'py'
,
'c|py'
,
'c'
,
'c|py_nogc'
,
'c&py'
),
EnumStr
(
'c|py'
,
'py'
,
'c'
,
'c|py_nogc'
,
'c&py'
,
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
in_c_key
=
False
)
warning
(
'GCC not detected ! Theano will be unable to execute optimized '
+
'C-implementations (for both CPU and GPU) and will default to '
+
...
...
theano/gof/lazylinker_c.c
0 → 100644
浏览文件 @
9433d5d2
#include <Python.h>
#include "structmember.h"
/**
TODO:
- Check max supported depth of recursion
- CLazyLinker should add context information to errors caught during evaluation. Say what node we were on, add the traceback attached to the node.
- Clear containers of fully-useed intermediate results if allow_gc is 1
- Add timers for profiling
- Add support for profiling space used.
*/
#include <time.h>
static
double
pytime
(
const
struct
timeval
*
tv
)
{
struct
timeval
t
;
if
(
!
tv
)
{
tv
=
&
t
;
gettimeofday
(
&
t
,
NULL
);
}
return
(
double
)
tv
->
tv_sec
+
(
double
)
tv
->
tv_usec
/
1000000
.
0
;
}
/**
CLazyLinker
*/
typedef
struct
{
PyObject_HEAD
/* Type-specific fields go here. */
PyObject
*
nodes
;
// the python list of nodes
PyObject
*
thunks
;
// python list of thunks
PyObject
*
pre_call_clear
;
//list of cells to clear on call.
int
allow_gc
;
Py_ssize_t
n_applies
;
int
n_vars
;
// number of variables in the graph
int
*
var_computed
;
// 1 or 0 for every variable
PyObject
**
var_computed_cells
;
Py_ssize_t
n_output_vars
;
Py_ssize_t
*
output_vars
;
// variables that *must* be evaluated by call
int
*
is_lazy
;
// 1 or 0 for every thunk
Py_ssize_t
*
var_owner
;
// nodes[[var_owner[var_idx]]] is var[var_idx]->owner
int
*
var_has_owner
;
// 1 or 0
Py_ssize_t
*
node_n_inputs
;
Py_ssize_t
*
node_n_outputs
;
Py_ssize_t
**
node_inputs
;
Py_ssize_t
**
node_outputs
;
Py_ssize_t
*
node_inputs_outputs_base
;
// node_inputs and node_outputs point into this
Py_ssize_t
*
node_n_prereqs
;
Py_ssize_t
**
node_prereqs
;
void
**
thunk_cptr_fn
;
void
**
thunk_cptr_data
;
PyObject
*
call_times
;
PyObject
*
call_counts
;
int
do_timing
;
int
position_of_error
;
// -1 for no error, otw the index into `thunks` that failed.
}
CLazyLinker
;
static
void
CLazyLinker_dealloc
(
PyObject
*
_self
)
{
CLazyLinker
*
self
=
(
CLazyLinker
*
)
_self
;
free
(
self
->
thunk_cptr_fn
);
free
(
self
->
thunk_cptr_data
);
free
(
self
->
is_lazy
);
if
(
self
->
node_n_prereqs
)
{
for
(
int
i
=
0
;
i
<
self
->
n_applies
;
++
i
)
{
free
(
self
->
node_prereqs
[
i
]);
}
}
free
(
self
->
node_n_prereqs
);
free
(
self
->
node_prereqs
);
free
(
self
->
node_inputs_outputs_base
);
free
(
self
->
node_n_inputs
);
free
(
self
->
node_n_outputs
);
free
(
self
->
node_inputs
);
free
(
self
->
node_outputs
);
free
(
self
->
var_owner
);
free
(
self
->
var_has_owner
);
free
(
self
->
var_computed
);
if
(
self
->
var_computed_cells
)
{
for
(
int
i
=
0
;
i
<
self
->
n_vars
;
++
i
)
{
Py_DECREF
(
self
->
var_computed_cells
[
i
]);
}
}
free
(
self
->
var_computed_cells
);
free
(
self
->
output_vars
);
Py_XDECREF
(
self
->
nodes
);
Py_XDECREF
(
self
->
thunks
);
Py_XDECREF
(
self
->
call_times
);
Py_XDECREF
(
self
->
call_counts
);
Py_XDECREF
(
self
->
pre_call_clear
);
self
->
ob_type
->
tp_free
((
PyObject
*
)
self
);
}
static
PyObject
*
CLazyLinker_new
(
PyTypeObject
*
type
,
PyObject
*
args
,
PyObject
*
kwds
)
{
CLazyLinker
*
self
;
self
=
(
CLazyLinker
*
)
type
->
tp_alloc
(
type
,
0
);
if
(
self
!=
NULL
)
{
self
->
nodes
=
NULL
;
self
->
thunks
=
NULL
;
self
->
pre_call_clear
=
NULL
;
self
->
allow_gc
=
1
;
self
->
n_applies
=
0
;
self
->
n_vars
=
0
;
self
->
var_computed
=
NULL
;
self
->
var_computed_cells
=
NULL
;
self
->
n_output_vars
=
0
;
self
->
output_vars
=
NULL
;
self
->
is_lazy
=
NULL
;
self
->
var_owner
=
NULL
;
self
->
var_has_owner
=
NULL
;
self
->
node_n_inputs
=
NULL
;
self
->
node_n_outputs
=
NULL
;
self
->
node_inputs
=
NULL
;
self
->
node_outputs
=
NULL
;
self
->
node_inputs_outputs_base
=
NULL
;
self
->
node_prereqs
=
NULL
;
self
->
node_n_prereqs
=
NULL
;
self
->
thunk_cptr_data
=
NULL
;
self
->
thunk_cptr_fn
=
NULL
;
self
->
call_times
=
NULL
;
self
->
call_counts
=
NULL
;
self
->
do_timing
=
0
;
self
->
position_of_error
=
-
1
;
}
return
(
PyObject
*
)
self
;
}
static
int
CLazyLinker_init
(
CLazyLinker
*
self
,
PyObject
*
args
,
PyObject
*
kwds
)
{
static
char
*
kwlist
[]
=
{
(
char
*
)
"nodes"
,
(
char
*
)
"thunks"
,
(
char
*
)
"pre_call_clear"
,
(
char
*
)
"allow_gc"
,
(
char
*
)
"call_counts"
,
(
char
*
)
"call_times"
,
(
char
*
)
"compute_map_list"
,
(
char
*
)
"base_input_output_list"
,
(
char
*
)
"node_n_inputs"
,
(
char
*
)
"node_n_outputs"
,
(
char
*
)
"node_input_offset"
,
(
char
*
)
"node_output_offset"
,
(
char
*
)
"var_owner"
,
(
char
*
)
"is_lazy_list"
,
(
char
*
)
"output_vars"
,
(
char
*
)
"node_prereqs"
,
(
char
*
)
"node_output_size"
,
NULL
};
PyObject
*
compute_map_list
=
NULL
,
*
base_input_output_list
=
NULL
,
*
node_n_inputs
=
NULL
,
*
node_n_outputs
=
NULL
,
*
node_input_offset
=
NULL
,
*
node_output_offset
=
NULL
,
*
var_owner
=
NULL
,
*
is_lazy
=
NULL
,
*
output_vars
=
NULL
,
*
node_prereqs
=
NULL
,
*
node_output_size
=
NULL
;
assert
(
!
self
->
nodes
);
if
(
!
PyArg_ParseTupleAndKeywords
(
args
,
kwds
,
"OOOiOOOOOOOOOOOOO"
,
kwlist
,
&
self
->
nodes
,
&
self
->
thunks
,
&
self
->
pre_call_clear
,
&
self
->
allow_gc
,
&
self
->
call_counts
,
&
self
->
call_times
,
&
compute_map_list
,
&
base_input_output_list
,
&
node_n_inputs
,
&
node_n_outputs
,
&
node_input_offset
,
&
node_output_offset
,
&
var_owner
,
&
is_lazy
,
&
output_vars
,
&
node_prereqs
,
&
node_output_size
))
return
-
1
;
Py_INCREF
(
self
->
nodes
);
Py_INCREF
(
self
->
thunks
);
Py_INCREF
(
self
->
pre_call_clear
);
Py_INCREF
(
self
->
call_counts
);
Py_INCREF
(
self
->
call_times
);
Py_ssize_t
n_applies
=
PyList_Size
(
self
->
nodes
);
self
->
n_applies
=
n_applies
;
self
->
n_vars
=
PyList_Size
(
var_owner
);
if
(
PyList_Size
(
self
->
thunks
)
!=
n_applies
)
return
-
1
;
if
(
PyList_Size
(
self
->
call_counts
)
!=
n_applies
)
return
-
1
;
if
(
PyList_Size
(
self
->
call_times
)
!=
n_applies
)
return
-
1
;
// allocated and initialize thunk_cptr_data and thunk_cptr_fn
if
(
n_applies
)
{
self
->
thunk_cptr_data
=
(
void
**
)
malloc
(
n_applies
*
sizeof
(
void
*
));
self
->
thunk_cptr_fn
=
(
void
**
)
malloc
(
n_applies
*
sizeof
(
void
*
));
self
->
is_lazy
=
(
int
*
)
malloc
(
n_applies
*
sizeof
(
int
));
self
->
node_prereqs
=
(
Py_ssize_t
**
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
*
));
self
->
node_n_prereqs
=
(
Py_ssize_t
*
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
));
assert
(
self
->
node_prereqs
);
assert
(
self
->
node_n_prereqs
);
assert
(
self
->
is_lazy
);
assert
(
self
->
thunk_cptr_fn
);
assert
(
self
->
thunk_cptr_data
);
// init these basic arrays
for
(
int
i
=
0
;
i
<
n_applies
;
++
i
)
{
self
->
thunk_cptr_data
[
i
]
=
NULL
;
self
->
thunk_cptr_fn
[
i
]
=
NULL
;
self
->
is_lazy
[
i
]
=
1
;
self
->
node_prereqs
[
i
]
=
NULL
;
self
->
node_n_prereqs
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
n_applies
;
++
i
)
{
PyObject
*
thunk
=
PyList_GetItem
(
self
->
thunks
,
i
);
//thunk is borrowed
if
(
PyObject_HasAttrString
(
thunk
,
"cthunk"
))
{
PyObject
*
cthunk
=
PyObject_GetAttrString
(
thunk
,
"cthunk"
);
//new reference
assert
(
cthunk
&&
PyCObject_Check
(
cthunk
));
self
->
thunk_cptr_fn
[
i
]
=
PyCObject_AsVoidPtr
(
cthunk
);
self
->
thunk_cptr_data
[
i
]
=
PyCObject_GetDesc
(
cthunk
);
Py_DECREF
(
cthunk
);
// cthunk is kept alive by membership in self->thunks
}
else
{
self
->
thunk_cptr_fn
[
i
]
=
NULL
;
self
->
thunk_cptr_data
[
i
]
=
NULL
;
}
PyObject
*
el_i
=
PyList_GetItem
(
is_lazy
,
i
);
self
->
is_lazy
[
i
]
=
PyNumber_AsSsize_t
(
el_i
,
NULL
);
/* now get the prereqs */
el_i
=
PyList_GetItem
(
node_prereqs
,
i
);
assert
(
PyList_Check
(
el_i
));
self
->
node_n_prereqs
[
i
]
=
PyList_Size
(
el_i
);
if
(
self
->
node_n_prereqs
[
i
])
{
self
->
node_prereqs
[
i
]
=
(
Py_ssize_t
*
)
malloc
(
PyList_Size
(
el_i
)
*
sizeof
(
Py_ssize_t
));
for
(
int
j
=
0
;
j
<
PyList_Size
(
el_i
);
++
j
)
{
PyObject
*
el_ij
=
PyList_GetItem
(
el_i
,
j
);
Py_ssize_t
N
=
PyNumber_AsSsize_t
(
el_ij
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
// N < n. variables
assert
(
N
<
PyList_Size
(
var_owner
));
self
->
node_prereqs
[
i
][
j
]
=
N
;
}
}
}
}
if
(
PyList_Check
(
base_input_output_list
))
{
Py_ssize_t
n_inputs_outputs_base
=
PyList_Size
(
base_input_output_list
);
self
->
node_inputs_outputs_base
=
(
Py_ssize_t
*
)
malloc
(
n_inputs_outputs_base
*
sizeof
(
Py_ssize_t
));
assert
(
self
->
node_inputs_outputs_base
);
for
(
int
i
=
0
;
i
<
n_inputs_outputs_base
;
++
i
)
{
PyObject
*
el_i
=
PyList_GetItem
(
base_input_output_list
,
i
);
Py_ssize_t
idx
=
PyNumber_AsSsize_t
(
el_i
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
self
->
node_inputs_outputs_base
[
i
]
=
idx
;
}
self
->
node_n_inputs
=
(
Py_ssize_t
*
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
));
assert
(
self
->
node_n_inputs
);
self
->
node_n_outputs
=
(
Py_ssize_t
*
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
));
assert
(
self
->
node_n_outputs
);
self
->
node_inputs
=
(
Py_ssize_t
**
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
*
));
assert
(
self
->
node_inputs
);
self
->
node_outputs
=
(
Py_ssize_t
**
)
malloc
(
n_applies
*
sizeof
(
Py_ssize_t
*
));
assert
(
self
->
node_outputs
);
for
(
int
i
=
0
;
i
<
n_applies
;
++
i
)
{
Py_ssize_t
N
;
N
=
PyNumber_AsSsize_t
(
PyList_GetItem
(
node_n_inputs
,
i
),
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
n_inputs_outputs_base
);
self
->
node_n_inputs
[
i
]
=
N
;
N
=
PyNumber_AsSsize_t
(
PyList_GetItem
(
node_n_outputs
,
i
),
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
n_inputs_outputs_base
);
self
->
node_n_outputs
[
i
]
=
N
;
N
=
PyNumber_AsSsize_t
(
PyList_GetItem
(
node_input_offset
,
i
),
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
n_inputs_outputs_base
);
self
->
node_inputs
[
i
]
=
&
self
->
node_inputs_outputs_base
[
N
];
N
=
PyNumber_AsSsize_t
(
PyList_GetItem
(
node_output_offset
,
i
),
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
n_inputs_outputs_base
);
self
->
node_outputs
[
i
]
=
&
self
->
node_inputs_outputs_base
[
N
];
}
}
else
{
PyErr_SetString
(
PyExc_TypeError
,
"base_input_output_list must be list"
);
return
-
1
;
}
// allocation for var_owner
if
(
PyList_Check
(
var_owner
))
{
self
->
var_owner
=
(
Py_ssize_t
*
)
malloc
(
self
->
n_vars
*
sizeof
(
Py_ssize_t
));
self
->
var_has_owner
=
(
int
*
)
malloc
(
self
->
n_vars
*
sizeof
(
int
));
self
->
var_computed
=
(
int
*
)
malloc
(
self
->
n_vars
*
sizeof
(
int
));
self
->
var_computed_cells
=
(
PyObject
**
)
malloc
(
self
->
n_vars
*
sizeof
(
PyObject
*
));
for
(
int
i
=
0
;
i
<
self
->
n_vars
;
++
i
)
{
PyObject
*
el_i
=
PyList_GetItem
(
var_owner
,
i
);
if
(
el_i
==
Py_None
)
{
self
->
var_has_owner
[
i
]
=
0
;
}
else
{
Py_ssize_t
N
=
PyNumber_AsSsize_t
(
el_i
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
n_applies
);
self
->
var_owner
[
i
]
=
N
;
self
->
var_has_owner
[
i
]
=
1
;
}
self
->
var_computed_cells
[
i
]
=
PyList_GetItem
(
compute_map_list
,
i
);
Py_INCREF
(
self
->
var_computed_cells
[
i
]);
}
}
else
{
PyErr_SetString
(
PyExc_TypeError
,
"var_owner must be list"
);
return
-
1
;
}
//output vars
if
(
PyList_Check
(
output_vars
))
{
self
->
n_output_vars
=
PyList_Size
(
output_vars
);
self
->
output_vars
=
(
Py_ssize_t
*
)
malloc
(
self
->
n_output_vars
*
sizeof
(
Py_ssize_t
));
assert
(
self
->
output_vars
);
for
(
int
i
=
0
;
i
<
self
->
n_output_vars
;
++
i
)
{
PyObject
*
el_i
=
PyList_GetItem
(
output_vars
,
i
);
Py_ssize_t
N
=
PyNumber_AsSsize_t
(
el_i
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
return
-
1
;
assert
(
N
<=
self
->
n_vars
);
self
->
output_vars
[
i
]
=
N
;
}
}
else
{
PyErr_SetString
(
PyExc_TypeError
,
"output_vars must be list"
);
return
-
1
;
}
return
0
;
}
static
void
set_position_of_error
(
CLazyLinker
*
self
,
int
owner_idx
)
{
if
(
self
->
position_of_error
==
-
1
)
{
self
->
position_of_error
=
owner_idx
;
}
}
static
PyObject
*
pycall
(
CLazyLinker
*
self
,
Py_ssize_t
node_idx
,
int
verbose
)
{
// call thunk to see which inputs it wants
PyObject
*
thunk
=
PyList_GetItem
(
self
->
thunks
,
node_idx
);
// refcounting - thunk is borrowed
PyObject
*
rval
=
NULL
;
if
(
self
->
do_timing
)
{
double
t0
=
pytime
(
NULL
);
if
(
verbose
)
fprintf
(
stderr
,
"calling via Python (node %i)
\n
"
,
(
int
)
node_idx
);
rval
=
PyObject_CallObject
(
thunk
,
NULL
);
double
t1
=
pytime
(
NULL
);
double
ti
=
PyFloat_AsDouble
(
PyList_GetItem
(
self
->
call_times
,
node_idx
));
PyList_SetItem
(
self
->
call_times
,
node_idx
,
PyFloat_FromDouble
(
t1
-
t0
+
ti
));
PyObject
*
count
=
PyList_GetItem
(
self
->
call_counts
,
node_idx
);
long
icount
=
PyInt_AsLong
(
count
);
PyList_SetItem
(
self
->
call_counts
,
node_idx
,
PyInt_FromLong
(
icount
+
1
));
}
else
{
if
(
verbose
)
fprintf
(
stderr
,
"calling via Python (node %i)
\n
"
,
(
int
)
node_idx
);
rval
=
PyObject_CallObject
(
thunk
,
NULL
);
}
return
rval
;
}
static
int
c_call
(
CLazyLinker
*
self
,
Py_ssize_t
node_idx
,
int
verbose
)
{
void
*
ptr_addr
=
self
->
thunk_cptr_fn
[
node_idx
];
int
(
*
fn
)(
void
*
)
=
(
int
(
*
)(
void
*
))(
ptr_addr
);
if
(
verbose
)
fprintf
(
stderr
,
"calling non-lazy shortcut (node %i)
\n
"
,
(
int
)
node_idx
);
int
err
=
0
;
if
(
self
->
do_timing
)
{
double
t0
=
pytime
(
NULL
);
err
=
fn
(
self
->
thunk_cptr_data
[
node_idx
]);
double
t1
=
pytime
(
NULL
);
double
ti
=
PyFloat_AsDouble
(
PyList_GetItem
(
self
->
call_times
,
node_idx
));
PyList_SetItem
(
self
->
call_times
,
node_idx
,
PyFloat_FromDouble
(
t1
-
t0
+
ti
));
PyObject
*
count
=
PyList_GetItem
(
self
->
call_counts
,
node_idx
);
long
icount
=
PyInt_AsLong
(
count
);
PyList_SetItem
(
self
->
call_counts
,
node_idx
,
PyInt_FromLong
(
icount
+
1
));
}
else
{
err
=
fn
(
self
->
thunk_cptr_data
[
node_idx
]);
}
if
(
err
)
{
// cast the argument to a PyList (as described near line 226 of cc.py)
PyObject
*
__ERROR
=
((
PyObject
**
)
self
->
thunk_cptr_data
[
node_idx
])[
0
];
assert
(
PyList_Check
(
__ERROR
));
assert
(
PyList_Size
(
__ERROR
)
==
3
);
PyObject
*
err_type
=
PyList_GetItem
(
__ERROR
,
0
);
//stolen ref
PyObject
*
err_msg
=
PyList_GetItem
(
__ERROR
,
1
);
//stolen ref
PyObject
*
err_trace
=
PyList_GetItem
(
__ERROR
,
2
);
//stolen ref
PyList_SET_ITEM
(
__ERROR
,
0
,
Py_None
);
Py_INCREF
(
Py_None
);
//clobbers old ref
PyList_SET_ITEM
(
__ERROR
,
1
,
Py_None
);
Py_INCREF
(
Py_None
);
//clobbers old ref
PyList_SET_ITEM
(
__ERROR
,
2
,
Py_None
);
Py_INCREF
(
Py_None
);
//clobbers old ref
assert
(
!
PyErr_Occurred
());
// because CLinker hid the exception in __ERROR aka data
PyErr_Restore
(
err_type
,
err_msg
,
err_trace
);
//steals refs to args
}
if
(
err
)
set_position_of_error
(
self
,
node_idx
);
return
err
;
}
static
int
lazy_rec_eval
(
CLazyLinker
*
self
,
Py_ssize_t
var_idx
,
PyObject
*
one
,
PyObject
*
zero
)
{
int
verbose
=
0
;
if
(
verbose
)
fprintf
(
stderr
,
"lazy_rec computing %i
\n
"
,
(
int
)
var_idx
);
int
err
=
0
;
if
(
self
->
var_computed
[
var_idx
]
||
!
self
->
var_has_owner
[
var_idx
])
{
return
0
;
}
else
{
Py_ssize_t
owner_idx
=
self
->
var_owner
[
var_idx
];
// STEP 1: compute the pre-requirements of the node
for
(
int
i
=
0
;
i
<
self
->
node_n_prereqs
[
owner_idx
];
++
i
)
{
Py_ssize_t
prereq_idx
=
self
->
node_prereqs
[
owner_idx
][
i
];
if
(
!
self
->
var_computed
[
prereq_idx
])
{
err
=
lazy_rec_eval
(
self
,
prereq_idx
,
one
,
zero
);
if
(
err
)
return
err
;
}
assert
(
self
->
var_computed
[
prereq_idx
]);
}
// STEP 2: compute the node itself
if
(
self
->
is_lazy
[
owner_idx
])
{
// update the compute_map cells corresponding to the inputs of this thunk
for
(
int
i
=
0
;
i
<
self
->
node_n_inputs
[
owner_idx
]
&&
(
!
err
);
++
i
)
{
int
in_idx
=
self
->
node_inputs
[
owner_idx
][
i
];
if
(
self
->
var_computed
[
in_idx
])
{
Py_INCREF
(
one
);
err
=
PyList_SetItem
(
self
->
var_computed_cells
[
in_idx
],
0
,
one
);
}
else
{
Py_INCREF
(
zero
);
err
=
PyList_SetItem
(
self
->
var_computed_cells
[
in_idx
],
0
,
zero
);
}
}
if
(
err
)
{
set_position_of_error
(
self
,
owner_idx
);
return
err
;
}
PyObject
*
rval
=
pycall
(
self
,
owner_idx
,
verbose
);
// refcounting - rval is new ref
//TODO: to prevent infinite loops
// - consider check that a thunk does not ask for an input that is already computed
if
(
rval
)
//call returned normally (no exception)
{
//update the computed-ness of any output cells
for
(
int
i
=
0
;
i
<
self
->
node_n_outputs
[
owner_idx
];
++
i
)
{
int
out_idx
=
self
->
node_outputs
[
owner_idx
][
i
];
PyObject
*
el_i
=
PyList_GetItem
(
self
->
var_computed_cells
[
out_idx
],
0
);
Py_ssize_t
N
=
PyNumber_AsSsize_t
(
el_i
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
{
Py_DECREF
(
rval
);
set_position_of_error
(
self
,
owner_idx
);
return
-
1
;
}
assert
(
N
==
0
||
N
==
1
);
self
->
var_computed
[
out_idx
]
=
N
;
}
if
(
!
self
->
var_computed
[
var_idx
])
{
if
(
PyList_Check
(
rval
))
{
if
(
PyList_Size
(
rval
))
{
for
(
int
i
=
0
;
i
<
PyList_Size
(
rval
)
&&
(
!
err
);
++
i
)
{
PyObject
*
el_i
=
PyList_GetItem
(
rval
,
i
);
Py_ssize_t
N
=
PyNumber_AsSsize_t
(
el_i
,
PyExc_IndexError
);
if
(
PyErr_Occurred
())
{
err
=
1
;
}
else
{
assert
(
N
<=
self
->
node_n_inputs
[
owner_idx
]);
Py_ssize_t
input_idx
=
self
->
node_inputs
[
owner_idx
][
N
];
err
=
lazy_rec_eval
(
self
,
input_idx
,
one
,
zero
);
}
}
if
(
!
err
)
err
=
lazy_rec_eval
(
self
,
var_idx
,
one
,
zero
);
}
else
{
PyErr_SetString
(
PyExc_ValueError
,
"lazy thunk returned empty list without computing output"
);
err
=
1
;
set_position_of_error
(
self
,
owner_idx
);
}
Py_DECREF
(
rval
);
set_position_of_error
(
self
,
owner_idx
);
return
err
;
}
else
// don't know what it returned, but it wasn't right.
{
//TODO: More helpful error to help find *which node* made this
// bad thunk
PyErr_SetString
(
PyExc_TypeError
,
"lazy thunk should list"
);
Py_DECREF
(
rval
);
set_position_of_error
(
self
,
owner_idx
);
return
1
;
}
}
Py_DECREF
(
rval
);
}
else
// pycall returned NULL (internal error)
{
assert
(
PyErr_Occurred
());
set_position_of_error
(
self
,
owner_idx
);
return
1
;
}
}
else
//owner is not a lazy op. Ensure all intputs are evaluated.
{
// loop over inputs to owner
// call lazy_rec_eval on each one that is not computed.
// if there's an error, pass it up the stack
for
(
int
i
=
0
;
i
<
self
->
node_n_inputs
[
owner_idx
];
++
i
)
{
Py_ssize_t
input_idx
=
self
->
node_inputs
[
owner_idx
][
i
];
if
(
!
self
->
var_computed
[
input_idx
])
{
err
=
lazy_rec_eval
(
self
,
input_idx
,
one
,
zero
);
if
(
err
)
return
err
;
}
assert
(
self
->
var_computed
[
input_idx
]);
}
// call the thunk for this owner.
if
(
self
->
thunk_cptr_fn
[
owner_idx
])
{
err
=
c_call
(
self
,
owner_idx
,
verbose
);
}
else
{
PyObject
*
rval
=
pycall
(
self
,
owner_idx
,
verbose
);
//rval is new ref
if
(
rval
)
//pycall returned normally (no exception)
{
if
(
rval
==
Py_None
)
{
Py_DECREF
(
rval
);
//ignore a return of None
}
else
if
(
PyList_Check
(
rval
))
{
PyErr_SetString
(
PyExc_TypeError
,
"non-lazy thunk should return None, not list"
);
err
=
1
;
set_position_of_error
(
self
,
owner_idx
);
Py_DECREF
(
rval
);
}
else
// don't know what it returned, but it wasn't right.
{
PyErr_SetObject
(
PyExc_TypeError
,
rval
);
err
=
1
;
set_position_of_error
(
self
,
owner_idx
);
}
}
else
// pycall returned NULL (internal error)
{
err
=
1
;
set_position_of_error
(
self
,
owner_idx
);
}
}
}
// loop over all outputs and mark them as computed
for
(
int
i
=
0
;
i
<
self
->
node_n_outputs
[
owner_idx
]
&&
(
!
err
);
++
i
)
{
self
->
var_computed
[
self
->
node_outputs
[
owner_idx
][
i
]]
=
1
;
}
}
return
err
;
}
PyObject
*
CLazyLinker_call
(
PyObject
*
_self
,
PyObject
*
args
,
PyObject
*
kwds
)
{
CLazyLinker
*
self
=
(
CLazyLinker
*
)
_self
;
static
char
*
kwlist
[]
=
{(
char
*
)
"time_thunks"
,
NULL
};
if
(
!
PyArg_ParseTupleAndKeywords
(
args
,
kwds
,
"|i"
,
kwlist
,
&
self
->
do_timing
))
return
NULL
;
int
err
=
0
;
self
->
position_of_error
=
-
1
;
PyObject
*
one
=
PyInt_FromLong
(
1
);
PyObject
*
zero
=
PyInt_FromLong
(
0
);
//clear storage of pre_call_clear elements
Py_ssize_t
n_pre_call_clear
=
PyList_Size
(
self
->
pre_call_clear
);
assert
(
PyList_Check
(
self
->
pre_call_clear
));
for
(
int
i
=
0
;
i
<
n_pre_call_clear
;
++
i
)
{
PyObject
*
el_i
=
PyList_GetItem
(
self
->
pre_call_clear
,
i
);
Py_INCREF
(
Py_None
);
PyList_SetItem
(
el_i
,
0
,
Py_None
);
}
//clear the computed flag out of all non-input vars
for
(
int
i
=
0
;
i
<
self
->
n_vars
;
++
i
)
{
self
->
var_computed
[
i
]
=
!
self
->
var_has_owner
[
i
];
if
(
self
->
var_computed
[
i
])
{
Py_INCREF
(
one
);
PyList_SetItem
(
self
->
var_computed_cells
[
i
],
0
,
one
);
}
else
{
Py_INCREF
(
zero
);
PyList_SetItem
(
self
->
var_computed_cells
[
i
],
0
,
zero
);
}
}
for
(
int
i
=
0
;
i
<
self
->
n_output_vars
&&
(
!
err
);
++
i
)
{
err
=
lazy_rec_eval
(
self
,
self
->
output_vars
[
i
],
one
,
zero
);
}
Py_DECREF
(
one
);
Py_DECREF
(
zero
);
if
(
err
)
return
NULL
;
Py_INCREF
(
Py_None
);
return
Py_None
;
}
#if 0
static PyMethodDef CLazyLinker_methods[] = {
{
//"name", (PyCFunction)CLazyLinker_accept, METH_VARARGS, "Return the name, combining the first and last name"
},
{NULL} /* Sentinel */
};
#endif
static
PyMemberDef
CLazyLinker_members
[]
=
{
{(
char
*
)
"nodes"
,
T_OBJECT_EX
,
offsetof
(
CLazyLinker
,
nodes
),
0
,
(
char
*
)
"list of nodes"
},
{(
char
*
)
"thunks"
,
T_OBJECT_EX
,
offsetof
(
CLazyLinker
,
thunks
),
0
,
(
char
*
)
"list of thunks in program"
},
{(
char
*
)
"call_counts"
,
T_OBJECT_EX
,
offsetof
(
CLazyLinker
,
call_counts
),
0
,
(
char
*
)
"number of calls of each thunk"
},
{(
char
*
)
"call_times"
,
T_OBJECT_EX
,
offsetof
(
CLazyLinker
,
call_times
),
0
,
(
char
*
)
"total runtime in each thunk"
},
{(
char
*
)
"position_of_error"
,
T_INT
,
offsetof
(
CLazyLinker
,
position_of_error
),
0
,
(
char
*
)
"position of failed thunk"
},
{(
char
*
)
"time_thunks"
,
T_INT
,
offsetof
(
CLazyLinker
,
do_timing
),
0
,
(
char
*
)
"bool: nonzero means call will time thunks"
},
{
NULL
}
/* Sentinel */
};
static
PyTypeObject
lazylinker_ext_CLazyLinkerType
=
{
PyObject_HEAD_INIT
(
NULL
)
0
,
/*ob_size*/
"lazylinker_ext.CLazyLinker"
,
/*tp_name*/
sizeof
(
CLazyLinker
),
/*tp_basicsize*/
0
,
/*tp_itemsize*/
CLazyLinker_dealloc
,
/*tp_dealloc*/
0
,
/*tp_print*/
0
,
/*tp_getattr*/
0
,
/*tp_setattr*/
0
,
/*tp_compare*/
0
,
/*tp_repr*/
0
,
/*tp_as_number*/
0
,
/*tp_as_sequence*/
0
,
/*tp_as_mapping*/
0
,
/*tp_hash */
CLazyLinker_call
,
/*tp_call*/
0
,
/*tp_str*/
0
,
/*tp_getattro*/
0
,
/*tp_setattro*/
0
,
/*tp_as_buffer*/
Py_TPFLAGS_DEFAULT
|
Py_TPFLAGS_BASETYPE
,
/*tp_flags*/
"CLazyLinker object"
,
/* tp_doc */
0
,
/* tp_traverse */
0
,
/* tp_clear */
0
,
/* tp_richcompare */
0
,
/* tp_weaklistoffset */
0
,
/* tp_iter */
0
,
/* tp_iternext */
0
,
//CLazyLinker_methods, /* tp_methods */
CLazyLinker_members
,
/* tp_members */
0
,
/* tp_getset */
0
,
/* tp_base */
0
,
/* tp_dict */
0
,
/* tp_descr_get */
0
,
/* tp_descr_set */
0
,
/* tp_dictoffset */
(
initproc
)
CLazyLinker_init
,
/* tp_init */
0
,
/* tp_alloc */
CLazyLinker_new
,
/* tp_new */
};
static
PyMethodDef
lazylinker_ext_methods
[]
=
{
{
NULL
}
/* Sentinel */
};
#ifndef PyMODINIT_FUNC
/* declarations for DLL import/export */
#define PyMODINIT_FUNC void
#endif
PyMODINIT_FUNC
initlazylinker_ext
(
void
)
{
PyObject
*
m
;
lazylinker_ext_CLazyLinkerType
.
tp_new
=
PyType_GenericNew
;
if
(
PyType_Ready
(
&
lazylinker_ext_CLazyLinkerType
)
<
0
)
return
;
m
=
Py_InitModule3
(
"lazylinker_ext"
,
lazylinker_ext_methods
,
"Example module that creates an extension type."
);
Py_INCREF
(
&
lazylinker_ext_CLazyLinkerType
);
PyModule_AddObject
(
m
,
"CLazyLinker"
,
(
PyObject
*
)
&
lazylinker_ext_CLazyLinkerType
);
}
theano/gof/lazylinker_c.py
0 → 100644
浏览文件 @
9433d5d2
import
os
import
theano
from
theano
import
config
from
theano.gof.compilelock
import
get_lock
,
release_lock
from
theano.gof
import
cmodule
get_lock
()
try
:
dirname
=
'lazylinker_ext'
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c'
)
code
=
open
(
cfile
)
.
read
()
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_compile_str
(
dirname
,
code
,
location
=
loc
)
from
lazylinker_ext.lazylinker_ext
import
*
finally
:
# Release lock on compilation directory.
release_lock
()
theano/gof/tests/test_vm.py
0 → 100644
浏览文件 @
9433d5d2
import
gc
import
sys
import
time
import
line_profiler
import
numpy
from
theano
import
function
from
theano.gof
import
vm
,
link
,
OpWiseCLinker
from
theano.compile
import
Mode
from
theano
import
tensor
from
theano.lazycond
import
cond
import
theano
def
test_speed
():
def
build_graph
(
x
,
depth
=
5
):
z
=
x
for
d
in
range
(
depth
):
z
=
(
z
+
z
)
return
z
def
numpy_version
(
x
,
depth
):
z
=
x
for
d
in
xrange
(
depth
):
z
=
(
z
+
z
)
return
z
def
time_numpy
():
steps_a
=
5
steps_b
=
100
x
=
numpy
.
asarray
([
2.0
,
3.0
],
dtype
=
theano
.
config
.
floatX
)
numpy_version
(
x
,
steps_a
)
t0
=
time
.
time
()
print
numpy_version
(
x
,
steps_a
)
t1
=
time
.
time
()
t2
=
time
.
time
()
print
numpy_version
(
x
,
steps_b
)
t3
=
time
.
time
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
print
"
%
s takes
%
f s/Kop"
%
(
'numpy'
,
(
1000
*
(
t_b
-
t_a
)
/
(
steps_b
-
steps_a
)))
def
time_linker
(
name
,
linker
):
steps_a
=
5
steps_b
=
100
x
=
tensor
.
vector
()
a
=
build_graph
(
x
,
steps_a
)
b
=
build_graph
(
x
,
steps_b
)
f_a
=
function
([
x
],
a
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()),
#profile='f_a speed test %s'%name,
)
f_b
=
function
([
x
],
b
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()),
#profile='f_b speed test %s'%name,
)
print
f_a
([
2.0
,
3.0
])
t0
=
time
.
time
()
print
f_a
([
2.0
,
3.0
])
t1
=
time
.
time
()
print
f_b
([
2.0
,
3.0
])
t2
=
time
.
time
()
print
f_b
([
2.0
,
3.0
])
t3
=
time
.
time
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
print
"
%
s takes
%
f s/Kop"
%
(
name
,
(
1000
*
(
t_b
-
t_a
)
/
(
steps_b
-
steps_a
)))
time_linker
(
'c|py'
,
OpWiseCLinker
)
time_linker
(
'vmLinker'
,
vm
.
VM_Linker
)
time_linker
(
'vmLinker_nogc'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
))
time_linker
(
'vmLinker_CLOOP'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
True
))
time_numpy
()
def
test_speed_lazy
():
def
build_graph
(
x
,
depth
=
5
):
z
=
x
for
d
in
range
(
depth
):
z
=
cond
(
z
>
0
,
-
z
,
z
)
return
z
def
time_linker
(
name
,
linker
):
steps_a
=
10
steps_b
=
100
x
=
tensor
.
vector
()
a
=
build_graph
(
x
,
steps_a
)
b
=
build_graph
(
x
,
steps_b
)
f_a
=
function
([
x
],
a
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()),
#profile='f_a lazy cond %s'%name,
)
f_b
=
function
([
x
],
b
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()),
#profile='f_b lazy cond %s'%name,
)
print
f_a
([
2.0
])
t0
=
time
.
time
()
print
f_a
([
2.0
])
t1
=
time
.
time
()
print
f_b
([
2.0
])
t2
=
time
.
time
()
print
f_b
([
2.0
])
t3
=
time
.
time
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
print
"
%
s takes
%
f s/Kop"
%
(
name
,
(
1000
*
(
t_b
-
t_a
)
/
(
steps_b
-
steps_a
)))
time_linker
(
'vmLinker'
,
vm
.
VM_Linker
)
time_linker
(
'vmLinker_nogc'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
))
time_linker
(
'vmLinker_C'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
True
))
run_memory_usage_tests
=
False
if
run_memory_usage_tests
:
# these are not normal unit tests, do not run them as part of standard
# suite. I ran them while looking at top, and stopped when memory usage was
# stable.
def
test_leak2
():
import
theano.sandbox.cuda
as
cuda
for
i
in
xrange
(
1000000
):
n
=
numpy
.
asarray
([
2.3
,
4.5
],
dtype
=
'f'
)
c
=
sys
.
getrefcount
(
n
)
a
=
cuda
.
CudaNdarray
(
n
)
assert
c
==
sys
.
getrefcount
(
n
)
if
not
i
%
1000
:
print
'.'
,
print
gc
.
collect
(),
print
gc
.
collect
()
sys
.
stdout
.
flush
()
def
test_no_leak_many_graphs
():
# Verify no memory leaks when creating and deleting a lot of functions
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
for
i
in
xrange
(
10000
):
x
=
tensor
.
vector
()
z
=
x
for
d
in
range
(
10
):
z
=
tensor
.
sin
(
-
z
+
1
)
f
=
function
([
x
],
z
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
'cvm'
))
if
not
i
%
100
:
print
gc
.
collect
()
sys
.
stdout
.
flush
()
gc
.
collect
()
if
1
:
f
([
2.0
])
f
([
3.0
])
f
([
4.0
])
f
([
5.0
])
def
test_no_leak_many_call_lazy
():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def
build_graph
(
x
,
depth
=
5
):
z
=
x
for
d
in
range
(
depth
):
z
=
cond
(
z
>
0
,
-
z
,
z
)
return
z
def
time_linker
(
name
,
linker
):
steps_a
=
10
x
=
tensor
.
vector
()
a
=
build_graph
(
x
,
steps_a
)
f_a
=
function
([
x
],
a
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()))
for
i
in
xrange
(
100000
):
f_a
([
2.0
])
if
0
:
# this doesn't seem to work, prints 0 for everything
import
resource
pre
=
resource
.
getrusage
(
resource
.
RUSAGE_SELF
)
post
=
resource
.
getrusage
(
resource
.
RUSAGE_SELF
)
print
pre
.
ru_ixrss
,
post
.
ru_ixrss
print
pre
.
ru_idrss
,
post
.
ru_idrss
print
pre
.
ru_maxrss
,
post
.
ru_maxrss
time_linker
(
'vmLinker_C'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
True
))
def
test_no_leak_many_call_nonlazy
():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def
build_graph
(
x
,
depth
=
5
):
z
=
x
for
d
in
range
(
depth
):
z
=
tensor
.
sin
(
-
z
+
1
)
return
z
def
time_linker
(
name
,
linker
):
steps_a
=
10
x
=
tensor
.
vector
()
a
=
build_graph
(
x
,
steps_a
)
f_a
=
function
([
x
],
a
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()))
for
i
in
xrange
(
500000
):
f_a
([
2.0
])
time_linker
(
'vmLinker_C'
,
lambda
:
vm
.
VM_Linker
(
allow_gc
=
False
,
use_cloop
=
True
))
theano/gof/vm.py
0 → 100644
浏览文件 @
9433d5d2
"""
VMs that run Theano graph computations.
"""
import
sys
import
time
import
link
import
traceback
from
theano.gof.python25
import
all
import
theano
config
=
theano
.
config
from
theano.configparser
import
config
,
AddConfigVar
,
BoolParam
from
theano
import
config
AddConfigVar
(
'profile'
,
"If VM should collect profile information"
,
BoolParam
(
False
))
def
raise_with_op
(
op
,
exc_info
=
None
):
"""WRITEME"""
if
exc_info
is
None
:
exc_info
=
sys
.
exc_info
()
exc_type
,
exc_value
,
exc_trace
=
exc_info
if
exc_type
==
KeyboardInterrupt
:
# print a simple traceback from KeyboardInterrupt
raise
exc_type
,
exc_value
,
exc_trace
try
:
trace
=
op
.
tag
.
trace
except
AttributeError
:
trace
=
()
exc_value
.
__thunk_trace__
=
trace
exc_value
.
args
+=
(
op
,
)
if
op
in
op
.
env
.
toposort
():
exc_value
.
args
+=
(
'Sequence id of Apply node='
+
str
(
op
.
env
.
toposort
()
.
index
(
op
)),)
raise
exc_type
,
exc_value
,
exc_trace
class
VM
(
object
):
"""
A VM object evaluates a Theano program with its __call__ method.
Attributes:
call_counts - list of integers, one for each thunk. call_count[i] is the
number of times thunks[i] was called in the course of computations
performed by call_with_timers().
call_times - list of floats, one for each thunk. call_times[i] is the amount
of runtime spent on thunks[i] in the course of computations performed by
call_with_timers().
"""
def
__init__
(
self
,
nodes
,
thunks
,
pre_call_clear
):
"""
Allocate a virtual machine.
nodes - a list of nodes in toposort order
thunks - a list of thunks to execute those nodes, in toposort order
pre_call_clear - a list of containers to empty at the beginning of each
call.
"""
if
len
(
nodes
)
!=
len
(
thunks
):
raise
ValueError
()
self
.
nodes
=
nodes
self
.
thunks
=
thunks
self
.
pre_call_clear
=
pre_call_clear
self
.
call_counts
=
[
0
]
*
len
(
nodes
)
self
.
call_times
=
[
0
]
*
len
(
nodes
)
self
.
time_thunks
=
False
def
__call__
(
self
):
"""
Run the machine.
Postcondition - all output variables have been computed. VMs vary in
what exactly this means and how it is done.
"""
raise
NotImplementedError
(
'override me'
)
def
clear_storage
(
self
):
"""
Free any internal references to temporary variables.
Free internal variables and outputs. Essentially, free as much memory
as possible without intefering with the ability to evaluate subsequent
calls.
"""
raise
NotImplementedError
(
'override me'
)
def
update_profile
(
self
,
profile
):
# accumulate into the profile object
for
node
,
thunk
,
t
,
c
in
zip
(
self
.
nodes
,
self
.
thunks
,
self
.
call_times
,
self
.
call_counts
):
profile
.
apply_time
.
setdefault
(
node
,
0.0
)
profile
.
apply_time
[
node
]
+=
t
profile
.
apply_callcount
.
setdefault
(
node
,
0
)
profile
.
apply_callcount
[
node
]
=
c
profile
.
apply_cimpl
[
node
]
=
hasattr
(
thunk
,
'cthunk'
)
# clear the timer info out of the buffers
for
i
in
range
(
len
(
self
.
call_times
)):
self
.
call_times
[
i
]
=
0.0
self
.
call_counts
[
i
]
=
0
class
Loop
(
VM
):
"""
Unconditional start-to-finish program execution in Python.
No garbage collection is allowed on intermediate results.
"""
def
__call__
(
self
):
if
self
.
time_thunks
:
for
cont
in
self
.
pre_call_clear
:
cont
[
0
]
=
None
try
:
for
i
,
(
thunk
,
node
)
in
enumerate
(
zip
(
self
.
thunks
,
self
.
nodes
)):
t0
=
time
.
time
()
thunk
()
t1
=
time
.
time
()
self
.
call_counts
[
i
]
+=
1
self
.
call_times
[
i
]
+=
t1
-
t0
except
:
raise_with_op
(
node
)
else
:
for
cont
in
self
.
pre_call_clear
:
cont
[
0
]
=
None
try
:
for
thunk
,
node
in
zip
(
self
.
thunks
,
self
.
nodes
):
thunk
()
except
:
raise_with_op
(
node
)
class
LoopGC
(
VM
):
"""
Unconditional start-to-finish program execution in Python.
Garbage collection is possible on intermediate results.
"""
def
__init__
(
self
,
nodes
,
thunks
,
pre_call_clear
,
post_thunk_clear
):
super
(
LoopGC
,
self
)
.
__init__
(
nodes
,
thunks
,
pre_call_clear
)
self
.
post_thunk_clear
=
post_thunk_clear
if
not
(
len
(
nodes
)
==
len
(
thunks
)
==
len
(
post_thunk_clear
)):
raise
ValueError
()
def
__call__
(
self
):
if
self
.
time_thunks
:
for
cont
in
self
.
pre_call_clear
:
cont
[
0
]
=
None
try
:
i
=
0
for
thunk
,
node
,
old_storage
in
zip
(
self
.
thunks
,
self
.
nodes
,
self
.
post_thunk_clear
):
t0
=
time
.
time
()
thunk
()
t1
=
time
.
time
()
self
.
call_counts
[
i
]
+=
1
self
.
call_times
[
i
]
+=
t1
-
t0
for
old_s
in
old_storage
:
old_s
[
0
]
=
None
i
+=
1
except
:
raise_with_op
(
node
)
else
:
for
cont
in
self
.
pre_call_clear
:
cont
[
0
]
=
None
try
:
for
thunk
,
node
,
old_storage
in
zip
(
self
.
thunks
,
self
.
nodes
,
self
.
post_thunk_clear
):
thunk
()
for
old_s
in
old_storage
:
old_s
[
0
]
=
None
except
:
raise_with_op
(
node
)
class
Stack
(
VM
):
"""
Finish-to-start evalution order of thunks.
This supports lazy evaluation of subtrees and partial
computations of graphs when only some inputs have changed.
"""
def
__init__
(
self
,
nodes
,
thunks
,
pre_call_clear
,
storage_map
,
compute_map
,
env
,
allow_gc
):
super
(
Stack
,
self
)
.
__init__
(
nodes
,
thunks
,
pre_call_clear
)
self
.
allow_gc
=
allow_gc
self
.
message
=
""
self
.
base_apply_stack
=
[
o
.
owner
for
o
in
env
.
outputs
if
o
.
owner
]
self
.
outputs
=
env
.
outputs
self
.
storage_map
=
storage_map
self
.
apply_time
=
{}
self
.
outputs_size
=
{}
self
.
compute_map
=
compute_map
self
.
node_idx
=
node_idx
=
{}
ords
=
env
.
orderings
()
for
i
,
node
in
enumerate
(
self
.
nodes
):
node_idx
[
node
]
=
i
self
.
apply_time
[
node
]
=
0
self
.
outputs_size
[
node
]
=
[]
node
.
destroy_dependencies
=
[]
if
node
in
ords
:
for
prereq
in
ords
[
node
]:
node
.
destroy_dependencies
+=
prereq
.
outputs
dependencies
=
self
.
dependencies
=
{}
for
k
in
storage_map
:
dependencies
[
k
]
=
[]
if
k
.
owner
and
k
.
clients
:
ls
=
[]
is_output
=
0
for
cl
in
k
.
clients
:
if
cl
[
0
]
is
not
'output'
:
ls
+=
cl
[
0
]
.
outputs
dependencies
[
k
]
+=
ls
if
config
.
profile
:
self
.
memory_size_map
=
{
"nt8"
:
1
,
"t16"
:
2
,
"t32"
:
4
,
"t64"
:
8
,
"128"
:
16
}
atexit
.
register
(
self
.
atexit_print_all
)
def
__call__
(
self
):
storage_map
=
self
.
storage_map
compute_map
=
self
.
compute_map
thunks
=
self
.
thunks
dependencies
=
self
.
dependencies
for
k
in
self
.
storage_map
:
compute_map
[
k
][
0
]
=
(
k
.
owner
is
None
)
# apply_stack contains nodes
apply_stack
=
list
(
self
.
base_apply_stack
)
last_apply_stack_len
=
-
1
ls
=
[]
while
apply_stack
:
# Make sure something happened last time round.
# This is just a safety check to make sure the op is written correctly
# apply_stack should either decrease in length by one (a thunk successfully applied), or
# increase in length (added dependencies over and above the original).
# NB: this doesn't catch cycles (would be too expensive/slow), just stalls.
apply_stack_len
=
len
(
apply_stack
)
assert
apply_stack_len
!=
last_apply_stack_len
last_apply_stack_len
=
apply_stack_len
current_apply
=
apply_stack
.
pop
()
# Use these for loops + breaks to short circuit evaluation
# This is a significant performance point
computed_ins
=
True
for
i
in
current_apply
.
inputs
:
if
not
compute_map
[
i
][
0
]:
computed_ins
=
False
break
computed_outs
=
True
for
o
in
current_apply
.
outputs
:
if
not
compute_map
[
o
][
0
]:
computed_outs
=
False
break
if
computed_ins
:
for
d
in
current_apply
.
destroy_dependencies
:
if
not
compute_map
[
d
][
0
]:
computed_ins
=
False
break
if
not
thunks
[
self
.
node_idx
[
current_apply
]]
.
lazy
:
# Check if all inputs are in place
# If so compute thunk and remove it from the apply_stack
# If not leave it in, and add to the apply_stack those that will
# produce you those inputs
if
computed_ins
and
not
computed_outs
:
try
:
t0
=
time
.
time
()
thunks
[
self
.
node_idx
[
current_apply
]]()
if
config
.
profile
:
dt
=
time
.
time
()
-
t0
self
.
apply_time
[
current_apply
]
+=
dt
## Computing the memory footprint of the the op
# ?? What about inplace .. if the op is inplace
# you don't actually ask for more memory!
size
=
[]
for
(
idx
,
o
)
in
enumerate
(
thunks
[
self
.
node_idx
[
current_apply
]]
.
outputs
):
if
not
hasattr
(
o
[
0
],
'size'
):
size
.
append
(
-
1
)
continue
s
=
o
[
0
]
.
size
dtype
=
str
(
o
[
0
]
.
dtype
)
dtype2
=
dtype
[
-
3
:]
s
*=
memory_size_map
[
dtype2
]
# KeyError here: couldn't determine the dtype memory size
size
.
append
(
s
)
self
.
outputs_size
[
current_apply
]
=
size
except
Exception
:
raise_with_op
(
current_apply
)
for
o
in
current_apply
.
outputs
:
compute_map
[
o
][
0
]
=
1
# Garbage Collection -> check if anybody else uses this input
if
self
.
allow_gc
:
for
i
in
current_apply
.
inputs
:
if
(
dependencies
[
i
]
and
i
.
owner
and
i
not
in
self
.
outputs
):
empty_storage_map
=
True
for
x
in
dependencies
[
i
]:
if
not
compute_map
[
x
][
0
]:
empty_storage_map
=
False
break
if
empty_storage_map
:
storage_map
[
i
][
0
]
=
None
elif
not
computed_ins
:
apply_stack
.
append
(
current_apply
)
apply_stack
.
extend
(
inp
.
owner
for
inp
in
current_apply
.
inputs
if
inp
.
owner
)
apply_stack
.
extend
(
inp
.
owner
for
inp
in
current_apply
.
destroy_dependencies
if
inp
.
owner
)
elif
not
computed_outs
:
# Try and run it to see if it works
try
:
t0
=
time
.
time
()
requires
=
thunks
[
self
.
node_idx
[
current_apply
]]()
dt
=
time
.
time
()
-
t0
self
.
apply_time
[
current_apply
]
+=
dt
except
Exception
:
raise_with_op
(
current_apply
)
if
requires
:
for
r
in
requires
:
# We are not done with this op ..
# so we added back and see to get the inputs we are missing
apply_stack
.
append
(
current_apply
)
if
current_apply
.
inputs
[
r
]
.
owner
:
apply_stack
.
append
(
current_apply
.
inputs
[
r
]
.
owner
)
else
:
if
config
.
profile
:
size
=
[]
for
(
idx
,
o
)
in
enumerate
(
thunks
[
self
.
node_idx
[
current_apply
]]
.
outputs
):
if
not
hasattr
(
o
[
0
],
'size'
):
size
.
append
(
-
1
)
continue
s
=
o
[
0
]
.
size
dtype
=
str
(
o
[
0
]
.
dtype
)
dtype2
=
dtype
[
-
2
:]
s
*=
memory_size_map
[
dtype2
]
# KeyError here: couldn't determine the dtype memory size
size
.
append
(
s
)
self
.
outputs_size
[
current_apply
]
=
size
if
self
.
allow_gc
:
for
i
in
current_apply
.
inputs
:
if
(
dependencies
[
i
]
and
i
.
owner
and
i
not
in
self
.
outputs
):
empty_storage_map
=
True
for
x
in
dependencies
[
i
]:
if
not
compute_map
[
x
][
0
]:
empty_storage_map
=
False
break
if
empty_storage_map
:
storage_map
[
i
][
0
]
=
None
try
:
import
lazylinker_c
class
CVM
(
lazylinker_c
.
CLazyLinker
,
VM
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
lazylinker_c
.
CLazyLinker
.
__init__
(
self
,
*
args
,
**
kwargs
)
# skip VM.__init__
except
ImportError
:
pass
class
VM_Linker
(
link
.
LocalLinker
):
"""
Class that satisfies the Linker interface by acting as a VM factory.
"""
def
__init__
(
self
,
allow_gc
=
True
,
use_cloop
=
False
):
self
.
env
=
None
self
.
allow_gc
=
allow_gc
self
.
use_cloop
=
use_cloop
def
accept
(
self
,
env
,
no_recycling
=
[]):
"""
:param env: a PerformLinker can have accepted one Env instance at a time.
:param no_recycling: WRITEME
:returns: self (TODO: WHY? Who calls this function?)
"""
if
self
.
env
is
not
None
and
self
.
env
is
not
env
:
return
type
(
self
)()
.
accept
(
env
,
no_recycling
)
self
.
env
=
env
self
.
no_recycling
=
no_recycling
return
self
def
make_vm
(
self
,
nodes
,
thunks
,
input_storage
,
output_storage
,
storage_map
,
post_thunk_clear
,
computed
,
compute_map
):
pre_call_clear
=
[
storage_map
[
v
]
for
v
in
self
.
no_recycling
]
if
self
.
use_cloop
:
# create a map from nodes to ints and vars to ints
nodes_idx
=
{}
vars_idx
=
{}
for
i
,
node
in
enumerate
(
nodes
):
nodes_idx
[
node
]
=
i
for
v
in
node
.
inputs
+
node
.
outputs
:
vars_idx
.
setdefault
(
v
,
len
(
vars_idx
))
for
v
in
self
.
env
.
inputs
+
self
.
env
.
outputs
:
vars_idx
.
setdefault
(
v
,
len
(
vars_idx
))
nodes_idx_inv
=
{}
vars_idx_inv
=
{}
for
(
node
,
i
)
in
nodes_idx
.
items
():
nodes_idx_inv
[
i
]
=
node
for
(
var
,
i
)
in
vars_idx
.
items
():
vars_idx_inv
[
i
]
=
var
# put storage_map and compute_map into a int-based scheme
n_applies
=
len
(
nodes
)
storage_map_list
=
[
storage_map
[
vars_idx_inv
[
i
]]
for
i
in
range
(
len
(
vars_idx_inv
))]
compute_map_list
=
[
compute_map
[
vars_idx_inv
[
i
]]
for
i
in
range
(
len
(
vars_idx_inv
))]
if
nodes
:
assert
type
(
storage_map_list
[
0
])
is
list
assert
type
(
compute_map_list
[
0
])
is
list
# build the pointers to node inputs and offsets
base_input_output_list
=
[]
node_n_inputs
=
[]
node_n_outputs
=
[]
node_input_offset
=
[]
node_output_offset
=
[]
for
node
in
nodes
:
inputs_idx
=
[
vars_idx
[
v
]
for
v
in
node
.
inputs
]
outputs_idx
=
[
vars_idx
[
v
]
for
v
in
node
.
outputs
]
node_n_inputs
.
append
(
len
(
inputs_idx
))
node_n_outputs
.
append
(
len
(
outputs_idx
))
node_input_offset
.
append
(
len
(
base_input_output_list
))
base_input_output_list
.
extend
(
inputs_idx
)
node_output_offset
.
append
(
len
(
base_input_output_list
))
base_input_output_list
.
extend
(
outputs_idx
)
# build the var owner array
var_owner
=
[
None
]
*
len
(
vars_idx
)
for
(
var
,
i
)
in
vars_idx
.
items
():
if
var
.
owner
:
var_owner
[
i
]
=
nodes_idx
[
var
.
owner
]
is_lazy_list
=
[
int
(
th
.
lazy
)
for
th
in
thunks
]
output_vars
=
[
vars_idx
[
v
]
for
v
in
self
.
env
.
outputs
]
# builds the list of prereqs induced by e.g. destroy_handler
ords
=
self
.
env
.
orderings
()
node_prereqs
=
[]
node_output_size
=
[]
for
i
,
node
in
enumerate
(
nodes
):
node_output_size
.
append
(
0
)
prereq_var_idxs
=
[]
for
prereq_node
in
ords
.
get
(
node
,[]):
prereq_var_idxs
.
extend
(
[
vars_idx
[
v
]
for
v
in
prereq_node
.
outputs
])
prereq_var_idxs
=
list
(
set
(
prereq_var_idxs
))
prereq_var_idxs
.
sort
()
# TODO: why sort?
node_prereqs
.
append
(
prereq_var_idxs
)
c0
=
sys
.
getrefcount
(
node_n_inputs
)
vm
=
CVM
(
nodes
,
thunks
,
pre_call_clear
,
allow_gc
=
self
.
allow_gc
,
call_counts
=
[
0
]
*
len
(
nodes
),
call_times
=
[
0.0
]
*
len
(
nodes
),
compute_map_list
=
compute_map_list
,
base_input_output_list
=
base_input_output_list
,
node_n_inputs
=
node_n_inputs
,
node_n_outputs
=
node_n_outputs
,
node_input_offset
=
node_input_offset
,
node_output_offset
=
node_output_offset
,
var_owner
=
var_owner
,
is_lazy_list
=
is_lazy_list
,
output_vars
=
output_vars
,
node_prereqs
=
node_prereqs
,
node_output_size
=
node_output_size
,
)
assert
c0
==
sys
.
getrefcount
(
node_n_inputs
)
else
:
if
all
([(
not
th
.
lazy
)
for
th
in
thunks
]):
# there is no conditional in the graph
if
self
.
allow_gc
:
vm
=
LoopGC
(
nodes
,
thunks
,
pre_call_clear
,
post_thunk_clear
)
else
:
vm
=
Loop
(
nodes
,
thunks
,
pre_call_clear
)
else
:
vm
=
Stack
(
nodes
,
thunks
,
pre_call_clear
,
storage_map
,
compute_map
,
self
.
env
,
self
.
allow_gc
)
return
vm
def
make_all
(
self
,
profiler
=
None
,
input_storage
=
None
,
output_storage
=
None
):
env
=
self
.
env
order
=
list
(
env
.
toposort
())
no_recycling
=
self
.
no_recycling
input_storage
,
output_storage
,
storage_map
=
link
.
map_storage
(
env
,
order
,
input_storage
,
output_storage
)
compute_map
=
{}
for
k
in
storage_map
:
compute_map
[
k
]
=
[
k
.
owner
is
None
]
thunks
=
[
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
for
node
in
order
]
computed
,
last_user
=
link
.
gc_helper
(
order
)
if
self
.
allow_gc
:
post_thunk_clear
=
[]
for
node
in
order
:
clear_after_this_thunk
=
[]
for
input
in
node
.
inputs
:
if
((
input
in
computed
)
and
(
input
not
in
env
.
outputs
)
and
(
node
==
last_user
[
input
])):
clear_after_this_thunk
.
append
(
storage_map
[
input
])
post_thunk_clear
.
append
(
clear_after_this_thunk
)
else
:
post_thunk_clear
=
None
vm
=
self
.
make_vm
(
order
,
thunks
,
input_storage
,
output_storage
,
storage_map
,
post_thunk_clear
,
computed
,
compute_map
)
return
(
vm
,
[
link
.
Container
(
input
,
storage
)
for
input
,
storage
in
zip
(
env
.
inputs
,
input_storage
)],
[
link
.
Container
(
output
,
storage
,
True
)
for
output
,
storage
in
zip
(
env
.
outputs
,
output_storage
)],
thunks
,
order
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论