Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cf7d0688
提交
cf7d0688
authored
3月 28, 2017
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove tentacles in compile.
上级
df95d9a9
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
29 行增加
和
155 行删除
+29
-155
__init__.py
theano/__init__.py
+0
-11
debugmode.py
theano/compile/debugmode.py
+15
-20
nanguardmode.py
theano/compile/nanguardmode.py
+0
-75
ops.py
theano/compile/ops.py
+1
-1
profiling.py
theano/compile/profiling.py
+13
-26
test_debugmode.py
theano/compile/tests/test_debugmode.py
+0
-22
没有找到文件。
theano/__init__.py
浏览文件 @
cf7d0688
...
...
@@ -126,17 +126,6 @@ else:
raise
ImportError
(
"The nose module is not installed."
" It is needed for Theano tests."
)
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
import
theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
# this cause circular import dependency. So we also test it manually
# after the import
if
theano
.
sandbox
.
cuda
.
cuda_available
:
import
theano.sandbox.cuda.tests.test_driver
if
config
.
enable_initial_driver_test
:
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
(
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
...
...
theano/compile/debugmode.py
浏览文件 @
cf7d0688
...
...
@@ -1198,10 +1198,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# To avoid circular imports
from
theano.tensor
import
TensorType
from
theano.sandbox.cuda
import
cuda_available
,
CudaNdarrayType
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarray
from
theano.sandbox.cuda
import
dimshuffle
as
cuda_dimshuffle
from
theano.gpuarray
import
GpuArrayType
try
:
import
pygpu
except
ImportError
:
pass
# TODO: Sparse? Scalar does not really make sense.
...
...
@@ -1240,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for
r
in
considered_outputs
:
# There is no risk to overwrite inputs, since r does not work
# inplace.
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
reuse_outputs
[
r
][
...
]
=
np
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
...
...
@@ -1250,15 +1251,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
del
reuse_outputs
# c_cont_output: use a c-continuous array
# (for TensorType
and CudaNdarray
, else None)
# (for TensorType, else None)
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
c_cont_outputs
=
{}
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
# Build a C-contiguous buffer
new_buf
=
r
.
type
.
value_zeros
(
r_vals
[
r
]
.
shape
)
# CudaNdarray don't have flags field
# assert new_buf.flags["C_CONTIGUOUS"]
assert
new_buf
.
flags
[
"C_CONTIGUOUS"
]
new_buf
[
...
]
=
np
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
c_cont_outputs
[
r
]
=
new_buf
...
...
@@ -1272,18 +1272,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
f_cont_outputs
=
{}
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
new_buf
=
np
.
zeros
(
shape
=
r_vals
[
r
]
.
shape
,
dtype
=
r_vals
[
r
]
.
dtype
,
order
=
'F'
)
new_buf
[
...
]
=
def_val
if
isinstance
(
r
.
type
,
CudaNdarrayType
):
# When the CudaNdarray is built, the underlying memory
# is c-contiguous, so we transpose it before and after.
new_buf
=
CudaNdarray
(
new_buf
.
T
)
new_buf
=
cuda_dimshuffle
(
new_buf
,
reversed
(
list
(
range
(
new_buf
.
ndim
))))
if
isinstance
(
r
.
type
,
GpuArrayType
):
new_buf
=
pygpu
.
array
(
new_buf
)
f_cont_outputs
[
r
]
=
new_buf
...
...
@@ -1305,7 +1301,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
max_ndim
=
0
rev_out_broadcastable
=
[]
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
if
max_ndim
<
r
.
ndim
:
rev_out_broadcastable
+=
[
True
]
*
(
r
.
ndim
-
max_ndim
)
max_ndim
=
r
.
ndim
...
...
@@ -1320,7 +1316,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# Initial allocation
init_strided
=
{}
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
# Create a buffer twice as large in every dimension,
# except if broadcastable, or for dimensions above
# config.DebugMode.check_preallocated_output_ndim
...
...
@@ -1399,7 +1395,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
name
=
'wrong_size
%
s'
%
str
(
tuple
(
shape_diff
))
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
r_shape_diff
=
shape_diff
[:
r
.
ndim
]
out_shape
=
[
max
((
s
+
sd
),
0
)
for
s
,
sd
in
zip
(
r_vals
[
r
]
.
shape
,
...
...
@@ -1741,7 +1737,6 @@ class _VariableEquivalenceTracker(object):
# List of default version of make thunk.
# This is needed to know if the user overrided it.
# The GpuOp will be added here when theano.sandbox.cuda is imported.
default_make_thunk
=
[
get_unbound_function
(
theano
.
gof
.
Op
.
make_thunk
)]
...
...
theano/compile/nanguardmode.py
浏览文件 @
cf7d0688
...
...
@@ -8,7 +8,6 @@ import numpy as np
import
theano
from
theano.configparser
import
config
import
theano.tensor
as
T
import
theano.sandbox.cuda
as
cuda
from
theano.compile
import
Mode
from
.mode
import
get_mode
...
...
@@ -107,16 +106,6 @@ def contains_nan(arr, node=None, var=None):
"""
if
not
_is_numeric_value
(
arr
,
var
):
return
False
elif
cuda
.
cuda_available
and
isinstance
(
arr
,
cuda
.
CudaNdarray
):
if
(
node
and
hasattr
(
theano
.
sandbox
,
'rng_mrg'
)
and
isinstance
(
node
.
op
,
# It store ints in float container
theano
.
sandbox
.
rng_mrg
.
GPU_mrg_uniform
)):
return
False
else
:
compile_gpu_func
(
True
,
False
,
False
)
return
np
.
isnan
(
f_gpumin
(
arr
.
reshape
(
arr
.
size
)))
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
return
np
.
isnan
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
...
...
@@ -150,70 +139,12 @@ def contains_inf(arr, node=None, var=None):
"""
if
not
_is_numeric_value
(
arr
,
var
):
return
False
elif
cuda
.
cuda_available
and
isinstance
(
arr
,
cuda
.
CudaNdarray
):
if
(
node
and
hasattr
(
theano
.
sandbox
,
'rng_mrg'
)
and
isinstance
(
node
.
op
,
# It store ints in float container
theano
.
sandbox
.
rng_mrg
.
GPU_mrg_uniform
)):
return
False
else
:
compile_gpu_func
(
False
,
True
,
False
)
return
(
np
.
isinf
(
f_gpumin
(
arr
.
reshape
(
arr
.
size
)))
or
np
.
isinf
(
f_gpumax
(
arr
.
reshape
(
arr
.
size
))))
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
return
(
np
.
isinf
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
or
np
.
isinf
(
f_gpua_max
(
arr
.
reshape
(
arr
.
size
))))
return
np
.
isinf
(
np
.
nanmax
(
arr
))
or
np
.
isinf
(
np
.
nanmin
(
arr
))
f_gpumin
=
None
f_gpumax
=
None
f_gpuabsmax
=
None
def
compile_gpu_func
(
nan_is_error
,
inf_is_error
,
big_is_error
):
""" compile utility function used by contains_nan and contains_inf
"""
global
f_gpumin
,
f_gpumax
,
f_gpuabsmax
if
not
cuda
.
cuda_available
:
return
guard_input
=
cuda
.
fvector
(
'nan_guard'
)
cuda_compile_failed
=
False
if
(
nan_is_error
or
inf_is_error
)
and
f_gpumin
is
None
:
try
:
f_gpumin
=
theano
.
function
(
[
guard_input
],
T
.
min
(
guard_input
),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
if
inf_is_error
and
not
cuda_compile_failed
and
f_gpumax
is
None
:
try
:
f_gpumax
=
theano
.
function
(
[
guard_input
],
T
.
max
(
guard_input
),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
if
big_is_error
and
not
cuda_compile_failed
and
f_gpuabsmax
is
None
:
try
:
f_gpuabsmax
=
theano
.
function
(
[
guard_input
],
T
.
max
(
T
.
abs_
(
guard_input
)),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
def
f_compute
(
op
):
def
result
(
inp
):
...
...
@@ -270,9 +201,6 @@ class NanGuardMode(Mode):
assert
nan_is_error
or
inf_is_error
or
big_is_error
if
cuda
.
cuda_enabled
:
compile_gpu_func
(
nan_is_error
,
inf_is_error
,
big_is_error
)
def
do_check_on
(
value
,
nd
,
var
=
None
):
"""
Checks `value` for NaNs / Infs. If detected, raises an exception
...
...
@@ -304,9 +232,6 @@ class NanGuardMode(Mode):
err
=
False
if
not
_is_numeric_value
(
value
,
var
):
err
=
False
elif
cuda
.
cuda_available
and
isinstance
(
value
,
cuda
.
CudaNdarray
):
compile_gpu_func
(
False
,
False
,
True
)
err
=
(
f_gpuabsmax
(
value
.
reshape
(
value
.
size
))
>
1e10
)
elif
pygpu_available
and
isinstance
(
value
,
GpuArray
):
err
=
(
f_gpua_absmax
(
value
.
reshape
(
value
.
size
))
>
1e10
)
else
:
...
...
theano/compile/ops.py
浏览文件 @
cf7d0688
...
...
@@ -810,7 +810,7 @@ class SpecifyShape(gof.Op):
We currently don't support specifying partial shape information.
TODO : test this op with sparse
and cuda ndarray
. Do C code for them too.
TODO : test this op with sparse. Do C code for them too.
"""
...
...
theano/compile/profiling.py
浏览文件 @
cf7d0688
...
...
@@ -262,11 +262,8 @@ class ProfileStats(object):
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
gpu_checks
=
True
,
**
kwargs
):
if
(
gpu_checks
and
((
hasattr
(
theano
,
'sandbox'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
theano
.
sandbox
.
cuda
.
cuda_enabled
)
or
(
hasattr
(
theano
,
'gpuarray'
)
and
theano
.
gpuarray
.
pygpu_activated
))
and
(
hasattr
(
theano
,
'gpuarray'
)
and
theano
.
gpuarray
.
pygpu_activated
)
and
os
.
environ
.
get
(
'CUDA_LAUNCH_BLOCKING'
,
'0'
)
!=
'1'
):
msg
=
(
"You are running the Theano profiler with CUDA enabled."
...
...
@@ -285,9 +282,9 @@ class ProfileStats(object):
theano
.
gpuarray
.
pygpu_activated
and
not
config
.
profiling
.
ignore_first_call
):
warnings
.
warn
(
"Theano flag profiling.ignore_first_call is False."
"
This cause bad profiling result in the new gpu
"
"
back-end, as sometimes we compile at the first call."
)
"Theano flag profiling.ignore_first_call is False.
"
"
This cause bad profiling result in the gpu
"
"back-end, as sometimes we compile at the first call."
)
self
.
apply_callcount
=
{}
self
.
output_size
=
{}
...
...
@@ -508,8 +505,8 @@ class ProfileStats(object):
tot
+=
t
ftot
=
tot
*
100
/
local_time
# Remove the useless start and end of the class name:
# "<class 'theano.
sandbox.cuda
.blas.GpuDot22'>" ->
# "theano.
sandbox.cuda
.blas.GpuDot22"
# "<class 'theano.
gpuarray
.blas.GpuDot22'>" ->
# "theano.
gpuarray
.blas.GpuDot22"
class_name
=
str
(
a
)[
8
:
-
2
][:
maxlen
]
print
(
format_str
%
(
f
,
ftot
,
t
,
t
/
nb_call
,
impl
,
nb_call
,
...
...
@@ -820,7 +817,8 @@ class ProfileStats(object):
new allocation.
"""
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.gpuarray
import
GpuArrayType
# Initial Mem info values [CPU, GPU]
node_memory_size
=
[
0
,
0
]
running_memory_size
=
[
0
,
0
]
...
...
@@ -870,7 +868,7 @@ class ProfileStats(object):
# allocated by the node
idx2
=
0
for
out
in
node
.
outputs
:
if
isinstance
(
out
.
type
,
CudaNda
rrayType
):
if
isinstance
(
out
.
type
,
GpuA
rrayType
):
cg
=
1
else
:
cg
=
0
...
...
@@ -912,7 +910,7 @@ class ProfileStats(object):
for
ins
in
set
(
node
.
inputs
):
assert
not
(
ins
in
view_of
and
viewed_by
[
ins
])
# we trac the original var, so this shouldn't happen
if
isinstance
(
ins
.
type
,
CudaNda
rrayType
):
if
isinstance
(
ins
.
type
,
GpuA
rrayType
):
cg
=
1
else
:
cg
=
0
...
...
@@ -1245,16 +1243,6 @@ class ProfileStats(object):
print
(
"---"
,
file
=
file
)
if
(
hasattr
(
theano
,
'sandbox'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
hasattr
(
theano
.
sandbox
.
cuda
,
'cuda_ndarray'
)
and
hasattr
(
theano
.
sandbox
.
cuda
.
cuda_ndarray
.
cuda_ndarray
,
'theano_allocated'
)):
cuda_ndarray
=
theano
.
sandbox
.
cuda
.
cuda_ndarray
.
cuda_ndarray
_
,
gpu_max
=
cuda_ndarray
.
theano_allocated
()
print
(
" Max Memory allocated on the GPU (for all functions): "
"
%
dKB"
%
int
(
round
(
gpu_max
/
1024.
)),
file
=
file
)
print
(
""
,
file
=
file
)
if
len
(
fct_memory
)
>
1
:
print
(
" This list is based on all functions in the profile"
,
...
...
@@ -1457,7 +1445,6 @@ class ProfileStats(object):
printed_tip
=
True
# tip 7
import
theano.sandbox.cuda
as
cuda
from
theano.tensor.nnet
import
LogSoftmax
import
theano.tensor.signal.pool
as
pool
import
theano.gpuarray
...
...
@@ -1465,12 +1452,12 @@ class ProfileStats(object):
for
a
in
self
.
apply_time
:
node
=
a
if
(
isinstance
(
node
.
op
,
pool
.
Pool
)):
if
(
not
cuda
.
dnn
.
dnn_available
()
and
not
theano
.
gpuarray
.
dnn
.
dnn_present
()
):
if
not
theano
.
gpuarray
.
dnn
.
dnn_present
(
):
print
(
"Install CuDNN to do pooling faster"
"this allows the operation to run on GPU"
)
printed_tip
=
True
if
(
isinstance
(
node
.
op
,
LogSoftmax
)):
if
(
not
cuda
.
dnn
.
dnn_available
()
and
not
theano
.
gpuarray
.
dnn
.
dnn_present
()
):
if
not
theano
.
gpuarray
.
dnn
.
dnn_present
(
):
print
(
"Install CuDNN to do LogSoftmax faster"
"this allows the operation to run on GPU"
)
printed_tip
=
True
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
cf7d0688
...
...
@@ -713,7 +713,6 @@ class VecAsRowAndCol(gof.Op):
if
(
c
[
0
]
is
None
)
or
(
c
[
0
]
.
shape
!=
(
lv
,
1
)):
c
[
0
]
=
node
.
outputs
[
1
]
.
type
.
value_zeros
((
lv
,
1
))
# Python loop because CudaNdarrays do not support newaxis
for
i
in
range
(
lv
):
r
[
0
][
0
,
i
]
=
v
[
i
]
c
[
0
][
i
,
0
]
=
v
[
i
]
...
...
@@ -794,24 +793,3 @@ class Test_preallocated_output(unittest.TestCase):
v_val
=
self
.
rng
.
randn
(
5
)
.
astype
(
'float32'
)
f
(
v_val
)
def
test_output_broadcast_cuda
(
self
):
from
theano.sandbox
import
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"Optional package Cuda disabled"
)
if
cuda
.
use
.
device_number
is
None
:
# We should normally set VecAsRowAndCol as a GPUOp But we
# don't want to do this here as this will disable others
# tests in this file. So we manually init the GPU if
# needed to remove warning.
cuda
.
use
(
"gpu"
,
force
=
True
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
v
=
cuda
.
fvector
(
'v'
)
c
,
r
=
VecAsRowAndCol
()(
v
)
f
=
theano
.
function
([
v
],
[
c
,
r
])
v_val
=
cuda
.
CudaNdarray
(
self
.
rng
.
randn
(
5
)
.
astype
(
'float32'
))
f
(
v_val
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论