Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cf7d0688
提交
cf7d0688
authored
3月 28, 2017
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove tentacles in compile.
上级
df95d9a9
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
29 行增加
和
155 行删除
+29
-155
__init__.py
theano/__init__.py
+0
-11
debugmode.py
theano/compile/debugmode.py
+15
-20
nanguardmode.py
theano/compile/nanguardmode.py
+0
-75
ops.py
theano/compile/ops.py
+1
-1
profiling.py
theano/compile/profiling.py
+13
-26
test_debugmode.py
theano/compile/tests/test_debugmode.py
+0
-22
没有找到文件。
theano/__init__.py
浏览文件 @
cf7d0688
...
@@ -126,17 +126,6 @@ else:
...
@@ -126,17 +126,6 @@ else:
raise
ImportError
(
"The nose module is not installed."
raise
ImportError
(
"The nose module is not installed."
" It is needed for Theano tests."
)
" It is needed for Theano tests."
)
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
import
theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
# this cause circular import dependency. So we also test it manually
# after the import
if
theano
.
sandbox
.
cuda
.
cuda_available
:
import
theano.sandbox.cuda.tests.test_driver
if
config
.
enable_initial_driver_test
:
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
(
config
.
device
.
startswith
(
'cuda'
)
or
if
(
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
...
...
theano/compile/debugmode.py
浏览文件 @
cf7d0688
...
@@ -1198,10 +1198,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1198,10 +1198,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# To avoid circular imports
# To avoid circular imports
from
theano.tensor
import
TensorType
from
theano.tensor
import
TensorType
from
theano.sandbox.cuda
import
cuda_available
,
CudaNdarrayType
from
theano.gpuarray
import
GpuArrayType
if
cuda_available
:
try
:
from
theano.sandbox.cuda
import
CudaNdarray
import
pygpu
from
theano.sandbox.cuda
import
dimshuffle
as
cuda_dimshuffle
except
ImportError
:
pass
# TODO: Sparse? Scalar does not really make sense.
# TODO: Sparse? Scalar does not really make sense.
...
@@ -1240,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1240,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
# There is no risk to overwrite inputs, since r does not work
# There is no risk to overwrite inputs, since r does not work
# inplace.
# inplace.
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
reuse_outputs
[
r
][
...
]
=
np
.
asarray
(
reuse_outputs
[
r
][
...
]
=
np
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
def_val
)
.
astype
(
r
.
type
.
dtype
)
...
@@ -1250,15 +1251,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1250,15 +1251,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
del
reuse_outputs
del
reuse_outputs
# c_cont_output: use a c-continuous array
# c_cont_output: use a c-continuous array
# (for TensorType
and CudaNdarray
, else None)
# (for TensorType, else None)
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
c_cont_outputs
=
{}
c_cont_outputs
=
{}
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
# Build a C-contiguous buffer
# Build a C-contiguous buffer
new_buf
=
r
.
type
.
value_zeros
(
r_vals
[
r
]
.
shape
)
new_buf
=
r
.
type
.
value_zeros
(
r_vals
[
r
]
.
shape
)
# CudaNdarray don't have flags field
assert
new_buf
.
flags
[
"C_CONTIGUOUS"
]
# assert new_buf.flags["C_CONTIGUOUS"]
new_buf
[
...
]
=
np
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
new_buf
[
...
]
=
np
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
c_cont_outputs
[
r
]
=
new_buf
c_cont_outputs
[
r
]
=
new_buf
...
@@ -1272,18 +1272,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1272,18 +1272,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
f_cont_outputs
=
{}
f_cont_outputs
=
{}
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
new_buf
=
np
.
zeros
(
new_buf
=
np
.
zeros
(
shape
=
r_vals
[
r
]
.
shape
,
shape
=
r_vals
[
r
]
.
shape
,
dtype
=
r_vals
[
r
]
.
dtype
,
dtype
=
r_vals
[
r
]
.
dtype
,
order
=
'F'
)
order
=
'F'
)
new_buf
[
...
]
=
def_val
new_buf
[
...
]
=
def_val
if
isinstance
(
r
.
type
,
CudaNdarrayType
):
if
isinstance
(
r
.
type
,
GpuArrayType
):
# When the CudaNdarray is built, the underlying memory
new_buf
=
pygpu
.
array
(
new_buf
)
# is c-contiguous, so we transpose it before and after.
new_buf
=
CudaNdarray
(
new_buf
.
T
)
new_buf
=
cuda_dimshuffle
(
new_buf
,
reversed
(
list
(
range
(
new_buf
.
ndim
))))
f_cont_outputs
[
r
]
=
new_buf
f_cont_outputs
[
r
]
=
new_buf
...
@@ -1305,7 +1301,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1305,7 +1301,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
max_ndim
=
0
max_ndim
=
0
rev_out_broadcastable
=
[]
rev_out_broadcastable
=
[]
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
if
max_ndim
<
r
.
ndim
:
if
max_ndim
<
r
.
ndim
:
rev_out_broadcastable
+=
[
True
]
*
(
r
.
ndim
-
max_ndim
)
rev_out_broadcastable
+=
[
True
]
*
(
r
.
ndim
-
max_ndim
)
max_ndim
=
r
.
ndim
max_ndim
=
r
.
ndim
...
@@ -1320,7 +1316,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1320,7 +1316,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# Initial allocation
# Initial allocation
init_strided
=
{}
init_strided
=
{}
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
# Create a buffer twice as large in every dimension,
# Create a buffer twice as large in every dimension,
# except if broadcastable, or for dimensions above
# except if broadcastable, or for dimensions above
# config.DebugMode.check_preallocated_output_ndim
# config.DebugMode.check_preallocated_output_ndim
...
@@ -1399,7 +1395,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1399,7 +1395,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
name
=
'wrong_size
%
s'
%
str
(
tuple
(
shape_diff
))
name
=
'wrong_size
%
s'
%
str
(
tuple
(
shape_diff
))
for
r
in
considered_outputs
:
for
r
in
considered_outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNda
rrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
GpuA
rrayType
)):
r_shape_diff
=
shape_diff
[:
r
.
ndim
]
r_shape_diff
=
shape_diff
[:
r
.
ndim
]
out_shape
=
[
max
((
s
+
sd
),
0
)
out_shape
=
[
max
((
s
+
sd
),
0
)
for
s
,
sd
in
zip
(
r_vals
[
r
]
.
shape
,
for
s
,
sd
in
zip
(
r_vals
[
r
]
.
shape
,
...
@@ -1741,7 +1737,6 @@ class _VariableEquivalenceTracker(object):
...
@@ -1741,7 +1737,6 @@ class _VariableEquivalenceTracker(object):
# List of default version of make thunk.
# List of default version of make thunk.
# This is needed to know if the user overrided it.
# This is needed to know if the user overrided it.
# The GpuOp will be added here when theano.sandbox.cuda is imported.
default_make_thunk
=
[
get_unbound_function
(
theano
.
gof
.
Op
.
make_thunk
)]
default_make_thunk
=
[
get_unbound_function
(
theano
.
gof
.
Op
.
make_thunk
)]
...
...
theano/compile/nanguardmode.py
浏览文件 @
cf7d0688
...
@@ -8,7 +8,6 @@ import numpy as np
...
@@ -8,7 +8,6 @@ import numpy as np
import
theano
import
theano
from
theano.configparser
import
config
from
theano.configparser
import
config
import
theano.tensor
as
T
import
theano.tensor
as
T
import
theano.sandbox.cuda
as
cuda
from
theano.compile
import
Mode
from
theano.compile
import
Mode
from
.mode
import
get_mode
from
.mode
import
get_mode
...
@@ -107,16 +106,6 @@ def contains_nan(arr, node=None, var=None):
...
@@ -107,16 +106,6 @@ def contains_nan(arr, node=None, var=None):
"""
"""
if
not
_is_numeric_value
(
arr
,
var
):
if
not
_is_numeric_value
(
arr
,
var
):
return
False
return
False
elif
cuda
.
cuda_available
and
isinstance
(
arr
,
cuda
.
CudaNdarray
):
if
(
node
and
hasattr
(
theano
.
sandbox
,
'rng_mrg'
)
and
isinstance
(
node
.
op
,
# It store ints in float container
theano
.
sandbox
.
rng_mrg
.
GPU_mrg_uniform
)):
return
False
else
:
compile_gpu_func
(
True
,
False
,
False
)
return
np
.
isnan
(
f_gpumin
(
arr
.
reshape
(
arr
.
size
)))
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
return
np
.
isnan
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
return
np
.
isnan
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
...
@@ -150,70 +139,12 @@ def contains_inf(arr, node=None, var=None):
...
@@ -150,70 +139,12 @@ def contains_inf(arr, node=None, var=None):
"""
"""
if
not
_is_numeric_value
(
arr
,
var
):
if
not
_is_numeric_value
(
arr
,
var
):
return
False
return
False
elif
cuda
.
cuda_available
and
isinstance
(
arr
,
cuda
.
CudaNdarray
):
if
(
node
and
hasattr
(
theano
.
sandbox
,
'rng_mrg'
)
and
isinstance
(
node
.
op
,
# It store ints in float container
theano
.
sandbox
.
rng_mrg
.
GPU_mrg_uniform
)):
return
False
else
:
compile_gpu_func
(
False
,
True
,
False
)
return
(
np
.
isinf
(
f_gpumin
(
arr
.
reshape
(
arr
.
size
)))
or
np
.
isinf
(
f_gpumax
(
arr
.
reshape
(
arr
.
size
))))
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
elif
pygpu_available
and
isinstance
(
arr
,
GpuArray
):
return
(
np
.
isinf
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
or
return
(
np
.
isinf
(
f_gpua_min
(
arr
.
reshape
(
arr
.
size
)))
or
np
.
isinf
(
f_gpua_max
(
arr
.
reshape
(
arr
.
size
))))
np
.
isinf
(
f_gpua_max
(
arr
.
reshape
(
arr
.
size
))))
return
np
.
isinf
(
np
.
nanmax
(
arr
))
or
np
.
isinf
(
np
.
nanmin
(
arr
))
return
np
.
isinf
(
np
.
nanmax
(
arr
))
or
np
.
isinf
(
np
.
nanmin
(
arr
))
f_gpumin
=
None
f_gpumax
=
None
f_gpuabsmax
=
None
def
compile_gpu_func
(
nan_is_error
,
inf_is_error
,
big_is_error
):
""" compile utility function used by contains_nan and contains_inf
"""
global
f_gpumin
,
f_gpumax
,
f_gpuabsmax
if
not
cuda
.
cuda_available
:
return
guard_input
=
cuda
.
fvector
(
'nan_guard'
)
cuda_compile_failed
=
False
if
(
nan_is_error
or
inf_is_error
)
and
f_gpumin
is
None
:
try
:
f_gpumin
=
theano
.
function
(
[
guard_input
],
T
.
min
(
guard_input
),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
if
inf_is_error
and
not
cuda_compile_failed
and
f_gpumax
is
None
:
try
:
f_gpumax
=
theano
.
function
(
[
guard_input
],
T
.
max
(
guard_input
),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
if
big_is_error
and
not
cuda_compile_failed
and
f_gpuabsmax
is
None
:
try
:
f_gpuabsmax
=
theano
.
function
(
[
guard_input
],
T
.
max
(
T
.
abs_
(
guard_input
)),
mode
=
'FAST_RUN'
)
except
RuntimeError
:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed
=
True
def
f_compute
(
op
):
def
f_compute
(
op
):
def
result
(
inp
):
def
result
(
inp
):
...
@@ -270,9 +201,6 @@ class NanGuardMode(Mode):
...
@@ -270,9 +201,6 @@ class NanGuardMode(Mode):
assert
nan_is_error
or
inf_is_error
or
big_is_error
assert
nan_is_error
or
inf_is_error
or
big_is_error
if
cuda
.
cuda_enabled
:
compile_gpu_func
(
nan_is_error
,
inf_is_error
,
big_is_error
)
def
do_check_on
(
value
,
nd
,
var
=
None
):
def
do_check_on
(
value
,
nd
,
var
=
None
):
"""
"""
Checks `value` for NaNs / Infs. If detected, raises an exception
Checks `value` for NaNs / Infs. If detected, raises an exception
...
@@ -304,9 +232,6 @@ class NanGuardMode(Mode):
...
@@ -304,9 +232,6 @@ class NanGuardMode(Mode):
err
=
False
err
=
False
if
not
_is_numeric_value
(
value
,
var
):
if
not
_is_numeric_value
(
value
,
var
):
err
=
False
err
=
False
elif
cuda
.
cuda_available
and
isinstance
(
value
,
cuda
.
CudaNdarray
):
compile_gpu_func
(
False
,
False
,
True
)
err
=
(
f_gpuabsmax
(
value
.
reshape
(
value
.
size
))
>
1e10
)
elif
pygpu_available
and
isinstance
(
value
,
GpuArray
):
elif
pygpu_available
and
isinstance
(
value
,
GpuArray
):
err
=
(
f_gpua_absmax
(
value
.
reshape
(
value
.
size
))
>
1e10
)
err
=
(
f_gpua_absmax
(
value
.
reshape
(
value
.
size
))
>
1e10
)
else
:
else
:
...
...
theano/compile/ops.py
浏览文件 @
cf7d0688
...
@@ -810,7 +810,7 @@ class SpecifyShape(gof.Op):
...
@@ -810,7 +810,7 @@ class SpecifyShape(gof.Op):
We currently don't support specifying partial shape information.
We currently don't support specifying partial shape information.
TODO : test this op with sparse
and cuda ndarray
. Do C code for them too.
TODO : test this op with sparse. Do C code for them too.
"""
"""
...
...
theano/compile/profiling.py
浏览文件 @
cf7d0688
...
@@ -262,11 +262,8 @@ class ProfileStats(object):
...
@@ -262,11 +262,8 @@ class ProfileStats(object):
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
gpu_checks
=
True
,
**
kwargs
):
gpu_checks
=
True
,
**
kwargs
):
if
(
gpu_checks
and
if
(
gpu_checks
and
((
hasattr
(
theano
,
'sandbox'
)
and
(
hasattr
(
theano
,
'gpuarray'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
theano
.
gpuarray
.
pygpu_activated
)
and
theano
.
sandbox
.
cuda
.
cuda_enabled
)
or
(
hasattr
(
theano
,
'gpuarray'
)
and
theano
.
gpuarray
.
pygpu_activated
))
and
os
.
environ
.
get
(
'CUDA_LAUNCH_BLOCKING'
,
'0'
)
!=
'1'
):
os
.
environ
.
get
(
'CUDA_LAUNCH_BLOCKING'
,
'0'
)
!=
'1'
):
msg
=
(
msg
=
(
"You are running the Theano profiler with CUDA enabled."
"You are running the Theano profiler with CUDA enabled."
...
@@ -285,9 +282,9 @@ class ProfileStats(object):
...
@@ -285,9 +282,9 @@ class ProfileStats(object):
theano
.
gpuarray
.
pygpu_activated
and
theano
.
gpuarray
.
pygpu_activated
and
not
config
.
profiling
.
ignore_first_call
):
not
config
.
profiling
.
ignore_first_call
):
warnings
.
warn
(
warnings
.
warn
(
"Theano flag profiling.ignore_first_call is False."
"Theano flag profiling.ignore_first_call is False.
"
"
This cause bad profiling result in the new gpu
"
"
This cause bad profiling result in the gpu
"
"
back-end, as sometimes we compile at the first call."
)
"back-end, as sometimes we compile at the first call."
)
self
.
apply_callcount
=
{}
self
.
apply_callcount
=
{}
self
.
output_size
=
{}
self
.
output_size
=
{}
...
@@ -508,8 +505,8 @@ class ProfileStats(object):
...
@@ -508,8 +505,8 @@ class ProfileStats(object):
tot
+=
t
tot
+=
t
ftot
=
tot
*
100
/
local_time
ftot
=
tot
*
100
/
local_time
# Remove the useless start and end of the class name:
# Remove the useless start and end of the class name:
# "<class 'theano.
sandbox.cuda
.blas.GpuDot22'>" ->
# "<class 'theano.
gpuarray
.blas.GpuDot22'>" ->
# "theano.
sandbox.cuda
.blas.GpuDot22"
# "theano.
gpuarray
.blas.GpuDot22"
class_name
=
str
(
a
)[
8
:
-
2
][:
maxlen
]
class_name
=
str
(
a
)[
8
:
-
2
][:
maxlen
]
print
(
format_str
%
(
f
,
ftot
,
t
,
t
/
nb_call
,
print
(
format_str
%
(
f
,
ftot
,
t
,
t
/
nb_call
,
impl
,
nb_call
,
impl
,
nb_call
,
...
@@ -820,7 +817,8 @@ class ProfileStats(object):
...
@@ -820,7 +817,8 @@ class ProfileStats(object):
new allocation.
new allocation.
"""
"""
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.gpuarray
import
GpuArrayType
# Initial Mem info values [CPU, GPU]
# Initial Mem info values [CPU, GPU]
node_memory_size
=
[
0
,
0
]
node_memory_size
=
[
0
,
0
]
running_memory_size
=
[
0
,
0
]
running_memory_size
=
[
0
,
0
]
...
@@ -870,7 +868,7 @@ class ProfileStats(object):
...
@@ -870,7 +868,7 @@ class ProfileStats(object):
# allocated by the node
# allocated by the node
idx2
=
0
idx2
=
0
for
out
in
node
.
outputs
:
for
out
in
node
.
outputs
:
if
isinstance
(
out
.
type
,
CudaNda
rrayType
):
if
isinstance
(
out
.
type
,
GpuA
rrayType
):
cg
=
1
cg
=
1
else
:
else
:
cg
=
0
cg
=
0
...
@@ -912,7 +910,7 @@ class ProfileStats(object):
...
@@ -912,7 +910,7 @@ class ProfileStats(object):
for
ins
in
set
(
node
.
inputs
):
for
ins
in
set
(
node
.
inputs
):
assert
not
(
ins
in
view_of
and
viewed_by
[
ins
])
assert
not
(
ins
in
view_of
and
viewed_by
[
ins
])
# we trac the original var, so this shouldn't happen
# we trac the original var, so this shouldn't happen
if
isinstance
(
ins
.
type
,
CudaNda
rrayType
):
if
isinstance
(
ins
.
type
,
GpuA
rrayType
):
cg
=
1
cg
=
1
else
:
else
:
cg
=
0
cg
=
0
...
@@ -1245,16 +1243,6 @@ class ProfileStats(object):
...
@@ -1245,16 +1243,6 @@ class ProfileStats(object):
print
(
"---"
,
file
=
file
)
print
(
"---"
,
file
=
file
)
if
(
hasattr
(
theano
,
'sandbox'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
hasattr
(
theano
.
sandbox
.
cuda
,
'cuda_ndarray'
)
and
hasattr
(
theano
.
sandbox
.
cuda
.
cuda_ndarray
.
cuda_ndarray
,
'theano_allocated'
)):
cuda_ndarray
=
theano
.
sandbox
.
cuda
.
cuda_ndarray
.
cuda_ndarray
_
,
gpu_max
=
cuda_ndarray
.
theano_allocated
()
print
(
" Max Memory allocated on the GPU (for all functions): "
"
%
dKB"
%
int
(
round
(
gpu_max
/
1024.
)),
file
=
file
)
print
(
""
,
file
=
file
)
print
(
""
,
file
=
file
)
if
len
(
fct_memory
)
>
1
:
if
len
(
fct_memory
)
>
1
:
print
(
" This list is based on all functions in the profile"
,
print
(
" This list is based on all functions in the profile"
,
...
@@ -1457,7 +1445,6 @@ class ProfileStats(object):
...
@@ -1457,7 +1445,6 @@ class ProfileStats(object):
printed_tip
=
True
printed_tip
=
True
# tip 7
# tip 7
import
theano.sandbox.cuda
as
cuda
from
theano.tensor.nnet
import
LogSoftmax
from
theano.tensor.nnet
import
LogSoftmax
import
theano.tensor.signal.pool
as
pool
import
theano.tensor.signal.pool
as
pool
import
theano.gpuarray
import
theano.gpuarray
...
@@ -1465,12 +1452,12 @@ class ProfileStats(object):
...
@@ -1465,12 +1452,12 @@ class ProfileStats(object):
for
a
in
self
.
apply_time
:
for
a
in
self
.
apply_time
:
node
=
a
node
=
a
if
(
isinstance
(
node
.
op
,
pool
.
Pool
)):
if
(
isinstance
(
node
.
op
,
pool
.
Pool
)):
if
(
not
cuda
.
dnn
.
dnn_available
()
and
not
theano
.
gpuarray
.
dnn
.
dnn_present
()
):
if
not
theano
.
gpuarray
.
dnn
.
dnn_present
(
):
print
(
"Install CuDNN to do pooling faster"
print
(
"Install CuDNN to do pooling faster"
"this allows the operation to run on GPU"
)
"this allows the operation to run on GPU"
)
printed_tip
=
True
printed_tip
=
True
if
(
isinstance
(
node
.
op
,
LogSoftmax
)):
if
(
isinstance
(
node
.
op
,
LogSoftmax
)):
if
(
not
cuda
.
dnn
.
dnn_available
()
and
not
theano
.
gpuarray
.
dnn
.
dnn_present
()
):
if
not
theano
.
gpuarray
.
dnn
.
dnn_present
(
):
print
(
"Install CuDNN to do LogSoftmax faster"
print
(
"Install CuDNN to do LogSoftmax faster"
"this allows the operation to run on GPU"
)
"this allows the operation to run on GPU"
)
printed_tip
=
True
printed_tip
=
True
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
cf7d0688
...
@@ -713,7 +713,6 @@ class VecAsRowAndCol(gof.Op):
...
@@ -713,7 +713,6 @@ class VecAsRowAndCol(gof.Op):
if
(
c
[
0
]
is
None
)
or
(
c
[
0
]
.
shape
!=
(
lv
,
1
)):
if
(
c
[
0
]
is
None
)
or
(
c
[
0
]
.
shape
!=
(
lv
,
1
)):
c
[
0
]
=
node
.
outputs
[
1
]
.
type
.
value_zeros
((
lv
,
1
))
c
[
0
]
=
node
.
outputs
[
1
]
.
type
.
value_zeros
((
lv
,
1
))
# Python loop because CudaNdarrays do not support newaxis
for
i
in
range
(
lv
):
for
i
in
range
(
lv
):
r
[
0
][
0
,
i
]
=
v
[
i
]
r
[
0
][
0
,
i
]
=
v
[
i
]
c
[
0
][
i
,
0
]
=
v
[
i
]
c
[
0
][
i
,
0
]
=
v
[
i
]
...
@@ -794,24 +793,3 @@ class Test_preallocated_output(unittest.TestCase):
...
@@ -794,24 +793,3 @@ class Test_preallocated_output(unittest.TestCase):
v_val
=
self
.
rng
.
randn
(
5
)
.
astype
(
'float32'
)
v_val
=
self
.
rng
.
randn
(
5
)
.
astype
(
'float32'
)
f
(
v_val
)
f
(
v_val
)
def
test_output_broadcast_cuda
(
self
):
from
theano.sandbox
import
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"Optional package Cuda disabled"
)
if
cuda
.
use
.
device_number
is
None
:
# We should normally set VecAsRowAndCol as a GPUOp But we
# don't want to do this here as this will disable others
# tests in this file. So we manually init the GPU if
# needed to remove warning.
cuda
.
use
(
"gpu"
,
force
=
True
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
v
=
cuda
.
fvector
(
'v'
)
c
,
r
=
VecAsRowAndCol
()(
v
)
f
=
theano
.
function
([
v
],
[
c
,
r
])
v_val
=
cuda
.
CudaNdarray
(
self
.
rng
.
randn
(
5
)
.
astype
(
'float32'
))
f
(
v_val
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论