Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c6ffa460
提交
c6ffa460
authored
12月 24, 2016
作者:
Pascal Lamblin
提交者:
GitHub
12月 24, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5357 from nouiz/abergeron-dnn_mem
Select the dnn convolution algorithm using actually available memory
上级
59f671e2
ec1ddad3
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
128 行增加
和
126 行删除
+128
-126
jenkins_test2.sh
.jenkins/jenkins_test2.sh
+9
-9
__init__.py
theano/gpuarray/__init__.py
+45
-40
dnn.py
theano/gpuarray/dnn.py
+0
-0
dnn_base.c
theano/gpuarray/dnn_base.c
+0
-4
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+28
-13
dnn_gi.c
theano/gpuarray/dnn_gi.c
+23
-12
dnn_gw.c
theano/gpuarray/dnn_gw.c
+23
-12
dnn_pool.c
theano/gpuarray/dnn_pool.c
+0
-4
dnn_pool_grad.c
theano/gpuarray/dnn_pool_grad.c
+0
-4
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+0
-7
type.py
theano/gpuarray/type.py
+0
-21
没有找到文件。
.jenkins/jenkins_test2.sh
浏览文件 @
c6ffa460
...
@@ -8,7 +8,7 @@ set -x
...
@@ -8,7 +8,7 @@ set -x
# Anaconda python
# Anaconda python
export
PATH
=
/usr/local/miniconda2/bin:
$PATH
export
PATH
=
/usr/local/miniconda2/bin:
$PATH
# CUDA
# CUDA
export
PATH
=
/usr/local/cuda/bin:
$PATH
export
PATH
=
/usr/local/cuda/bin:
$PATH
export
LD_LIBRARY_PATH
=
/usr/local/cuda/lib64:
$LD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
/usr/local/cuda/lib64:
$LD_LIBRARY_PATH
export
LIBRARY_PATH
=
/usr/local/cuda/lib64:
$LIBRARY_PATH
export
LIBRARY_PATH
=
/usr/local/cuda/lib64:
$LIBRARY_PATH
...
@@ -38,13 +38,13 @@ echo "===== Testing gpuarray backend"
...
@@ -38,13 +38,13 @@ echo "===== Testing gpuarray backend"
GPUARRAY_CONFIG
=
"Release"
GPUARRAY_CONFIG
=
"Release"
DEVICE
=
cuda0
DEVICE
=
cuda0
LIBDIR
=
~/tmp
/local
LIBDIR
=
${
WORKSPACE
}
/local
# Make fresh clones of libgpuarray (with no history since we don't need it)
# Make fresh clones of libgpuarray (with no history since we don't need it)
rm
-rf
libgpuarray
rm
-rf
libgpuarray
git clone
--depth
1
"https://github.com/Theano/libgpuarray.git"
git clone
--depth
1
"https://github.com/Theano/libgpuarray.git"
# Clean up previous installs (to make sure no old files are left)
# Clean up previous installs (to make sure no old files are left)
rm
-rf
$LIBDIR
rm
-rf
$LIBDIR
mkdir
$LIBDIR
mkdir
$LIBDIR
...
@@ -52,25 +52,25 @@ mkdir $LIBDIR
...
@@ -52,25 +52,25 @@ mkdir $LIBDIR
mkdir
libgpuarray/build
mkdir
libgpuarray/build
(
cd
libgpuarray/build
&&
cmake ..
-DCMAKE_BUILD_TYPE
=
${
GPUARRAY_CONFIG
}
-DCMAKE_INSTALL_PREFIX
=
$LIBDIR
&&
make
)
(
cd
libgpuarray/build
&&
cmake ..
-DCMAKE_BUILD_TYPE
=
${
GPUARRAY_CONFIG
}
-DCMAKE_INSTALL_PREFIX
=
$LIBDIR
&&
make
)
# Finally install
# Finally install
(
cd
libgpuarray/build
&&
make
install
)
(
cd
libgpuarray/build
&&
make
install
)
# Export paths
# Export paths
export
LD_LIBRARY_PATH
=
$LD_LIBRARY_PATH
:
$LIBDIR
/lib64/
export
LIBRARY_PATH
=
$LIBRARY_PATH
:
$LIBDIR
/lib64/
export
CPATH
=
$CPATH
:
$LIBDIR
/include
export
CPATH
=
$CPATH
:
$LIBDIR
/include
export
LIBRARY_PATH
=
$LIBRARY_PATH
:
$LIBDIR
/lib
export
LIBRARY_PATH
=
$LIBRARY_PATH
:
$LIBDIR
/lib
export
LD_LIBRARY_PATH
=
$LD_LIBRARY_PATH
:
$LIBDIR
/lib
export
LD_LIBRARY_PATH
=
$LD_LIBRARY_PATH
:
$LIBDIR
/lib
# Build the pygpu modules
# Build the pygpu modules
(
cd
libgpuarray
&&
python setup.py build_ext
--inplace
-I
$LIBDIR
/include
-L
$LIBDIR
/lib
)
(
cd
libgpuarray
&&
python setup.py build_ext
--inplace
-I
$LIBDIR
/include
-L
$LIBDIR
/lib
)
ls
$LIBDIR
ls
$LIBDIR
mkdir
$LIBDIR
/lib/python
mkdir
$LIBDIR
/lib/python
export
PYTHONPATH
=
${
PYTHONPATH
}
:
$LIBDIR
/lib/python
export
PYTHONPATH
=
${
PYTHONPATH
}
:
$LIBDIR
/lib/python
# Then install
# Then install
(
cd
libgpuarray
&&
python setup.py
install
--home
=
$LIBDIR
)
(
cd
libgpuarray
&&
python setup.py
install
--home
=
$LIBDIR
)
# Testing theano (the gpuarray parts)
python
-c
'import pygpu; print(pygpu.__file__)'
# Testing theano (the gpuarray parts)
THEANO_GPUARRAY_TESTS
=
"theano/gpuarray/tests
\
THEANO_GPUARRAY_TESTS
=
"theano/gpuarray/tests
\
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial
\
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial
\
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel
\
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel
\
...
...
theano/gpuarray/__init__.py
浏览文件 @
c6ffa460
...
@@ -27,7 +27,7 @@ except ImportError:
...
@@ -27,7 +27,7 @@ except ImportError:
# This is for documentation not to depend on the availability of pygpu
# This is for documentation not to depend on the availability of pygpu
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
reg_context
,
get_context
,
ContextNotDefined
,
_get_props
)
reg_context
,
get_context
,
ContextNotDefined
)
from
.basic_ops
import
as_gpuarray_variable
from
.basic_ops
import
as_gpuarray_variable
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
,
reduction
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
,
reduction
...
@@ -46,63 +46,66 @@ def init_dev(dev, name=None):
...
@@ -46,63 +46,66 @@ def init_dev(dev, name=None):
if
not
config
.
cxx
:
if
not
config
.
cxx
:
raise
RuntimeError
(
"The new gpu-backend need a c++ compiler."
)
raise
RuntimeError
(
"The new gpu-backend need a c++ compiler."
)
if
(
pygpu
.
version
.
major
,
pygpu
.
version
.
minor
)
<
(
0
,
6
):
if
(
pygpu
.
version
.
major
,
pygpu
.
version
.
minor
)
<
(
0
,
6
):
raise
ValueError
(
"Your installed version of pygpu is too old, please upgrade to 0.6 or later"
)
raise
ValueError
(
"Your installed version of pygpu is too old, please upgrade to 0.6 or later"
)
# This is for the C headers API
if
pygpu
.
gpuarray
.
api_version
()[
0
]
<
0
:
raise
ValueError
(
"Your installed libgpuarray is too old, please update"
)
if
dev
not
in
init_dev
.
devmap
:
if
dev
not
in
init_dev
.
devmap
:
ctx
=
pygpu
.
init
(
dev
,
context
=
pygpu
.
init
(
disable_alloc_cache
=
config
.
gpuarray
.
preallocate
<
0
,
dev
,
single_stream
=
config
.
gpuarray
.
single_stream
,
disable_alloc_cache
=
config
.
gpuarray
.
preallocate
<
0
,
sched
=
config
.
gpuarray
.
sched
)
single_stream
=
config
.
gpuarray
.
single_stream
,
init_dev
.
devmap
[
dev
]
=
ctx
sched
=
config
.
gpuarray
.
sched
)
context
.
dev
=
dev
init_dev
.
devmap
[
dev
]
=
context
reg_context
(
name
,
context
)
if
dev
.
startswith
(
'cuda'
):
avail
=
dnn
.
dnn_available
(
name
)
if
avail
:
context
.
cudnn_handle
=
dnn
.
_make_handle
(
context
)
if
config
.
print_active_device
:
if
avail
:
print
(
"Using cuDNN version
%
d on context
%
s"
%
(
dnn
.
version
(),
name
),
file
=
sys
.
stderr
)
else
:
print
(
"Can not use cuDNN on context
%
s:
%
s"
%
(
name
,
dnn
.
dnn_available
.
msg
),
file
=
sys
.
stderr
)
if
config
.
gpuarray
.
preallocate
<
0
:
if
config
.
gpuarray
.
preallocate
<
0
:
print
(
"Disabling allocation cache on
%
s"
%
(
dev
,))
print
(
"Disabling allocation cache on
%
s"
%
(
dev
,))
elif
config
.
gpuarray
.
preallocate
>
0
:
elif
config
.
gpuarray
.
preallocate
>
0
:
MB
=
(
1024
*
1024
)
MB
=
(
1024
*
1024
)
if
config
.
gpuarray
.
preallocate
<=
1
:
if
config
.
gpuarray
.
preallocate
<=
1
:
gmem
=
min
(
config
.
gpuarray
.
preallocate
,
0.95
)
*
c
tx
.
total_gmem
gmem
=
min
(
config
.
gpuarray
.
preallocate
,
0.95
)
*
c
ontext
.
total_gmem
else
:
else
:
gmem
=
config
.
gpuarray
.
preallocate
*
MB
gmem
=
config
.
gpuarray
.
preallocate
*
MB
if
gmem
>
context
.
free_gmem
-
50
*
MB
:
print
(
"WARNING: Preallocating too much memory can prevent cudnn and cublas from working properly"
)
# This will allocate and immediatly free an object of size gmem
# This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU.
# which will reserve that amount of memory on the GPU.
pygpu
.
empty
((
gmem
,),
dtype
=
'int8'
,
context
=
c
tx
)
pygpu
.
empty
((
gmem
,),
dtype
=
'int8'
,
context
=
c
ontext
)
if
config
.
print_active_device
:
if
config
.
print_active_device
:
print
(
"Preallocating
%
d/
%
d Mb (
%
f) on
%
s"
%
print
(
"Preallocating
%
d/
%
d Mb (
%
f) on
%
s"
%
(
gmem
//
MB
,
ctx
.
total_gmem
//
MB
,
gmem
/
ctx
.
total_gmem
,
dev
),
(
gmem
//
MB
,
context
.
total_gmem
//
MB
,
gmem
/
context
.
total_gmem
,
dev
),
file
=
sys
.
stderr
)
file
=
sys
.
stderr
)
context
=
init_dev
.
devmap
[
dev
]
else
:
context
=
init_dev
.
devmap
[
dev
]
# This will map the context name to the real context object.
# This will map the context name to the real context object.
reg_context
(
name
,
context
)
if
config
.
print_active_device
:
if
config
.
print_active_device
:
try
:
try
:
pcibusid
=
context
.
pcibusid
pcibusid
=
'('
+
context
.
pcibusid
+
')'
except
pygpu
.
gpuarray
.
UnsupportedException
:
except
pygpu
.
gpuarray
.
UnsupportedException
:
pcibusid
=
'(unsupported for device
%
s)'
%
dev
pcibusid
=
''
except
Exception
:
warnings
.
warn
(
'Unable to get PCI Bus ID. Please consider updating libgpuarray and pygpu.'
)
pcibusid
=
'unknown'
print
(
"Mapped name
%
s to device
%
s:
%
s"
%
print
(
"Mapped name
%
s to device
%
s:
%
s
%
s
"
%
(
name
,
dev
,
context
.
devname
),
(
name
,
dev
,
context
.
devname
,
pcibusid
),
file
=
sys
.
stderr
)
file
=
sys
.
stderr
)
print
(
"PCI Bus ID:"
,
pcibusid
,
file
=
sys
.
stderr
)
pygpu_activated
=
True
pygpu_activated
=
True
ctx_props
=
_get_props
(
name
)
ctx_props
[
'dev'
]
=
dev
if
dev
.
startswith
(
'cuda'
):
if
'cudnn_version'
not
in
ctx_props
:
try
:
ctx_props
[
'cudnn_version'
]
=
dnn
.
version
()
# 5200 should not print warning with cudnn 5.1 final.
if
ctx_props
[
'cudnn_version'
]
>=
5200
:
warnings
.
warn
(
"Your cuDNN version is more recent than "
"Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to "
"version 5.1."
)
if
config
.
print_active_device
:
print
(
"Using cuDNN version
%
d on context
%
s"
%
(
ctx_props
[
'cudnn_version'
],
name
),
file
=
sys
.
stderr
)
ctx_props
[
'cudnn_handle'
]
=
dnn
.
_make_handle
(
context
)
except
Exception
:
pass
# This maps things like 'cuda0' to the context object on that device.
# This maps things like 'cuda0' to the context object on that device.
init_dev
.
devmap
=
{}
init_dev
.
devmap
=
{}
...
@@ -119,7 +122,8 @@ if pygpu:
...
@@ -119,7 +122,8 @@ if pygpu:
elif
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
elif
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
if
config
.
device
!=
'cpu'
:
if
config
.
device
!=
'cpu'
:
raise
ValueError
(
'you must set device=cpu to use init_gpu_device.'
)
raise
ValueError
(
'you must set device=cpu to use init_gpu_device.'
)
if
config
.
contexts
!=
''
:
if
config
.
contexts
!=
''
:
print
(
"Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want."
)
print
(
"Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want."
)
init_dev
(
config
.
init_gpu_device
)
init_dev
(
config
.
init_gpu_device
)
...
@@ -147,4 +151,5 @@ else:
...
@@ -147,4 +151,5 @@ else:
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'cuda'
)
or
config
.
contexts
!=
''
):
config
.
contexts
!=
''
):
error
(
"pygpu was configured but could not be imported or is too old (version 0.6 or higher required)"
,
exc_info
=
True
)
error
(
"pygpu was configured but could not be imported or is too old (version 0.6 or higher required)"
,
exc_info
=
True
)
theano/gpuarray/dnn.py
浏览文件 @
c6ffa460
差异被折叠。
点击展开。
theano/gpuarray/dnn_base.c
浏览文件 @
c6ffa460
...
@@ -115,11 +115,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
...
@@ -115,11 +115,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
if
(
nd
<
3
)
if
(
nd
<
3
)
nd
=
3
;
nd
=
3
;
#if CUDNN_VERSION >= 5000
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
CUDNN_TENSOR_NCHW
,
nd
,
dims
);
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
CUDNN_TENSOR_NCHW
,
nd
,
dims
);
#else
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
nd
,
dims
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
c6ffa460
...
@@ -98,12 +98,37 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -98,12 +98,37 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif
#endif
if
(
!
reuse_algo
)
{
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
#ifdef CHOOSE_TIME
#ifdef CHOOSE_TIME
int
count
;
int
count
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
err
=
cudnnFindConvolutionForwardAlgorithm
(
gpudata
*
tmpmem
;
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
1
,
&
count
,
&
choice
);
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
// We don't sync the buffer as we don't care about the values.
err
=
cudnnFindConvolutionForwardAlgorithmEx
(
_handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -114,16 +139,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -114,16 +139,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
err
=
cudnnGetConvolutionForwardAlgorithm
(
err
=
cudnnGetConvolutionForwardAlgorithm
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
output
),
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
c6ffa460
...
@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif
#endif
if
(
!
reuse_algo
)
{
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
#ifdef CHOOSE_TIME
#ifdef CHOOSE_TIME
int
count
;
int
count
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
err
=
cudnnFindConvolutionBackwardDataAlgorithm
(
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardDataAlgorithmEx
(
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
1
,
&
count
,
&
choice
);
APPLY_SPECIFIC
(
input
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
...
@@ -157,16 +178,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -157,16 +178,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
desc
,
APPLY_SPECIFIC
(
input
),
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
c6ffa460
...
@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
#endif
if
(
!
reuse_algo
)
{
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
#ifdef CHOOSE_TIME
#ifdef CHOOSE_TIME
int
count
;
int
count
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
err
=
cudnnFindConvolutionBackwardFilterAlgorithm
(
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardFilterAlgorithmEx
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
1
,
&
count
,
&
choice
);
APPLY_SPECIFIC
(
kerns
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -158,16 +179,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -158,16 +179,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
kerns
),
...
...
theano/gpuarray/dnn_pool.c
浏览文件 @
c6ffa460
...
@@ -71,11 +71,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -71,11 +71,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/gpuarray/dnn_pool_grad.c
浏览文件 @
c6ffa460
...
@@ -111,11 +111,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -111,11 +111,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
c6ffa460
...
@@ -604,9 +604,6 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -604,9 +604,6 @@ class TestDnnInferShapes(utt.InferShapeTester):
[
conv_modes
[
0
]])),
[
conv_modes
[
0
]])),
testcase_func_name
=
utt
.
custom_name_func
)
testcase_func_name
=
utt
.
custom_name_func
)
def
test_conv
(
self
,
algo
,
border_mode
,
conv_mode
):
def
test_conv
(
self
,
algo
,
border_mode
,
conv_mode
):
if
algo
==
'winograd'
and
dnn
.
version
(
raises
=
False
)
<
5000
:
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
self
.
_test_conv
(
T
.
tensor4
(
'img'
),
self
.
_test_conv
(
T
.
tensor4
(
'img'
),
T
.
tensor4
(
'kerns'
),
T
.
tensor4
(
'kerns'
),
T
.
tensor4
(
'out'
),
T
.
tensor4
(
'out'
),
...
@@ -1361,8 +1358,6 @@ class test_SoftMax(test_nnet.test_SoftMax):
...
@@ -1361,8 +1358,6 @@ class test_SoftMax(test_nnet.test_SoftMax):
def
test_dnn_batchnorm_train
():
def
test_dnn_batchnorm_train
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
if
dnn
.
version
(
raises
=
False
)
<
5000
:
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
for
mode
in
(
'per-activation'
,
'spatial'
):
for
mode
in
(
'per-activation'
,
'spatial'
):
...
@@ -1416,8 +1411,6 @@ def test_dnn_batchnorm_train():
...
@@ -1416,8 +1411,6 @@ def test_dnn_batchnorm_train():
def
test_batchnorm_inference
():
def
test_batchnorm_inference
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
if
dnn
.
version
(
raises
=
False
)
<
5000
:
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
for
mode
in
(
'per-activation'
,
'spatial'
):
for
mode
in
(
'per-activation'
,
'spatial'
):
...
...
theano/gpuarray/type.py
浏览文件 @
c6ffa460
...
@@ -68,7 +68,6 @@ def reg_context(name, ctx):
...
@@ -68,7 +68,6 @@ def reg_context(name, ctx):
if
not
isinstance
(
ctx
,
gpuarray
.
GpuContext
):
if
not
isinstance
(
ctx
,
gpuarray
.
GpuContext
):
raise
TypeError
(
"context is not GpuContext"
)
raise
TypeError
(
"context is not GpuContext"
)
_context_reg
[
name
]
=
ctx
_context_reg
[
name
]
=
ctx
_props_map
[
ctx
]
=
dict
()
def
get_context
(
name
):
def
get_context
(
name
):
...
@@ -97,26 +96,6 @@ def list_contexts():
...
@@ -97,26 +96,6 @@ def list_contexts():
"""
"""
return
_context_reg
.
keys
()
return
_context_reg
.
keys
()
# Mappings of properties to contexts. Please never use this if you
# can avoid it.
# This is basically a way to store "global" variables that depend on
# the context.
_props_map
=
{}
def
_get_props
(
name
):
ctx
=
get_context
(
name
)
return
_props_map
[
ctx
]
def
get_prop
(
name
,
k
):
return
_get_props
(
name
)[
k
]
def
set_prop
(
name
,
k
,
v
):
_get_props
(
name
)[
k
]
=
v
# Private method
# Private method
def
_name_for_ctx
(
ctx
):
def
_name_for_ctx
(
ctx
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论