Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
50968f7a
提交
50968f7a
authored
9月 16, 2014
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2112 from nouiz/abergeron-dnn
Abergeron dnn
上级
fa1d9c4a
5b796440
隐藏空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
356 行增加
和
48 行删除
+356
-48
cop.txt
doc/extending/cop.txt
+11
-1
conv.txt
doc/library/tensor/nnet/conv.txt
+7
-0
cc.py
theano/gof/cc.py
+26
-12
op.py
theano/gof/op.py
+6
-1
test_op.py
theano/gof/tests/test_op.py
+1
-1
__init__.py
theano/sandbox/cuda/__init__.py
+7
-6
cudnn_helper.h
theano/sandbox/cuda/cudnn_helper.h
+31
-0
dnn.py
theano/sandbox/cuda/dnn.py
+235
-0
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+32
-27
没有找到文件。
doc/extending/cop.txt
浏览文件 @
50968f7a
...
...
@@ -99,7 +99,7 @@ There are less methods to define for an Op than for a Type:
module is initialized, before anything else is executed and is
specialized for a particular apply of an :ref:`op`.
.. method:: c_init_code_struct(node, struct_id)
.. method:: c_init_code_struct(node, struct_id
, sub
)
Allows you to specify code that will be inserted in the struct
constructor of the Op. This is for code which should be
...
...
@@ -108,6 +108,16 @@ There are less methods to define for an Op than for a Type:
`struct_id` is an integer guaranteed to be unique inside the
struct.
`sub` is a dictionary of extras parameters to the
c_code_init_code_struct method. It contains the following
values:
``sub['fail']``
A string of code that you should execute (after ensuring
that a python exception is set) if your C code needs to
raise an exception.
.. method:: c_support_code()
Allows you to specify helper functions/structs that the
...
...
doc/library/tensor/nnet/conv.txt
浏览文件 @
50968f7a
...
...
@@ -94,6 +94,13 @@ TODO: Give examples on how to use these things! They are pretty complicated.
f = theano.function(..., mode=mode)
- :func:`GpuDnnConv <theano.sandbox.cuda.dnn.GpuDnnConv>` GPU-only
convolution using NVIDIA's cuDNN library. To enable it (and
other cudnn-acclerated ops), set
``THEANO_FLAGS=optimizer_including=cudnn`` in your environment.
This requires that you have cuDNN installed and available. It
also requires a GPU with compute capability 3.0 or more.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution applying multi-channel 3D filters to batches of
multi-channel 3D images.
...
...
theano/gof/cc.py
浏览文件 @
50968f7a
...
...
@@ -143,6 +143,18 @@ def failure_code(sub):
goto __label_
%(id)
i;}'''
%
sub
def
failure_code_init
(
sub
):
"Code for failure in the struct init."
return
'''{
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
return
%(id)
d;
}'''
%
sub
def
code_gen
(
blocks
):
"""WRITEME From a list of L{CodeBlock} instances, returns a string
that executes them all in sequence. eg for C{(decl1, task1,
...
...
@@ -205,8 +217,7 @@ def struct_gen(args, struct_builders, blocks, sub):
# be executed if any step in the constructor fails and the
# latter only at destruction time.
struct_decl
+=
block
.
declare
struct_init_head
=
struct_init_head
+
(
"
\n
{
\n
%
s"
%
block
.
behavior
)
struct_init_tail
=
(
"
%
s
\n
}
\n
"
%
block
.
cleanup
)
+
struct_init_tail
struct_init_head
=
struct_init_head
+
(
"
\n
%
s"
%
block
.
behavior
)
struct_cleanup
+=
block
.
cleanup
behavior
=
code_gen
(
blocks
)
...
...
@@ -258,6 +269,7 @@ def struct_gen(args, struct_builders, blocks, sub):
# TODO: add some error checking to make sure storage_<x> are
# 1-element lists and __ERROR is a 3-elements list.
struct_code
=
"""
namespace {
struct
%(name)
s {
...
...
@@ -274,13 +286,9 @@ def struct_gen(args, struct_builders, blocks, sub):
int init(PyObject* __ERROR,
%(args_decl)
s) {
%(storage_incref)
s
%(storage_set)
s
int
%(failure_var)
s = 0;
%(struct_init_head)
s
this->__ERROR = __ERROR;
return 0;
%(struct_init_tail)
s
%(storage_decref)
s
%(do_return)
s
}
void cleanup(void) {
%(struct_cleanup)
s
...
...
@@ -333,7 +341,7 @@ def get_c_init(r, name, sub):
def
get_c_extract
(
r
,
name
,
sub
):
"""Wrapper around c_extract that initializes py_name from storage."""
if
any
([
getattr
(
c
.
op
,
'check_input'
,
config
.
check_input
)
for
(
c
,
_
)
in
if
any
([
getattr
(
c
.
op
,
'check_input'
,
config
.
check_input
)
for
(
c
,
_
)
in
r
.
clients
]):
c_extract
=
r
.
type
.
c_extract
(
name
,
sub
,
True
)
...
...
@@ -419,7 +427,7 @@ def struct_variable_codeblocks(variable, policies, id, symbol_table, sub):
sub
=
dict
(
sub
)
# sub['name'] = name
sub
[
'id'
]
=
id
sub
[
'fail'
]
=
failure_code
(
sub
)
sub
[
'fail'
]
=
failure_code
_init
(
sub
)
sub
[
'py_ptr'
]
=
"py_
%
s"
%
name
sub
[
'stor_ptr'
]
=
"storage_
%
s"
%
name
# struct_declare, struct_behavior, struct_cleanup, sub)
...
...
@@ -530,9 +538,8 @@ class CLinker(link.Linker):
failure_var
=
"__failure"
id
=
1
sub
=
dict
(
failure_var
=
failure_var
)
for
variable
in
self
.
variables
:
sub
=
dict
(
failure_var
=
failure_var
)
# it might be possible to inline constant variables as C literals
# policy = [[what to declare in the struct,
...
...
@@ -634,6 +641,10 @@ class CLinker(link.Linker):
sub
[
'struct_id'
]
=
id
+
1
sub
[
'fail'
]
=
failure_code
(
sub
)
sub_struct
=
dict
()
sub_struct
[
'id'
]
=
id
+
1
sub_struct
[
'fail'
]
=
failure_code_init
(
sub
)
struct_support
=
""
struct_init
=
""
struct_cleanup
=
""
...
...
@@ -661,7 +672,7 @@ class CLinker(link.Linker):
" didn't return a string for c_init_code_apply"
)
try
:
struct_init
=
op
.
c_init_code_struct
(
node
,
id
+
1
)
struct_init
=
op
.
c_init_code_struct
(
node
,
id
+
1
,
sub_struct
)
assert
isinstance
(
struct_init
,
basestring
),
(
str
(
node
.
op
)
+
" didn't return a string for c_init_code_struct"
)
...
...
@@ -1418,7 +1429,10 @@ class CLinker(link.Linker):
print
>>
code
,
' return NULL;'
print
>>
code
,
' }'
print
>>
code
,
'
%(struct_name)
s* struct_ptr = new
%(struct_name)
s();'
%
locals
()
print
>>
code
,
' struct_ptr->init('
,
','
.
join
(
'PyTuple_GET_ITEM(argtuple,
%
i)'
%
n
for
n
in
xrange
(
n_args
)),
');'
print
>>
code
,
' if (struct_ptr->init('
,
','
.
join
(
'PyTuple_GET_ITEM(argtuple,
%
i)'
%
n
for
n
in
xrange
(
n_args
)),
') != 0) {'
print
>>
code
,
' delete struct_ptr;'
print
>>
code
,
' return NULL;'
print
>>
code
,
' }'
if
PY3
:
print
>>
code
,
"""
\
PyObject* thunk = PyCapsule_New((void*)(&{struct_name}_executor), NULL, {struct_name}_destructor);
...
...
theano/gof/op.py
浏览文件 @
50968f7a
...
...
@@ -319,7 +319,7 @@ class CLinkerOp(CLinkerObject):
raise
utils
.
MethodNotDefined
(
"c_init_code_apply"
,
type
(
self
),
self
.
__class__
.
__name__
)
def
c_init_code_struct
(
self
,
node
,
struct_id
):
def
c_init_code_struct
(
self
,
node
,
struct_id
,
sub
):
"""
Optional: return a code string specific to the apply
to be inserted in the struct initialization code.
...
...
@@ -331,6 +331,11 @@ class CLinkerOp(CLinkerObject):
sub parameter named struct_id that will
contain this name.
:param sub: a dictionary of values to substitute in the code.
Most notably it contains a 'fail' entry that you
should place in your code after setting a python
exception to indicate an error.
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
...
...
theano/gof/tests/test_op.py
浏览文件 @
50968f7a
...
...
@@ -89,7 +89,7 @@ class StructOp(Op):
def
c_support_code_struct
(
self
,
node
,
struct_id
):
return
"npy_uint64 counter
%
d;"
%
(
struct_id
,)
def
c_init_code_struct
(
self
,
node
,
struct_id
):
def
c_init_code_struct
(
self
,
node
,
struct_id
,
sub
):
return
"counter
%
d = 0;"
%
(
struct_id
,)
def
c_code
(
self
,
node
,
name
,
input_names
,
outputs_names
,
sub
):
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
50968f7a
...
...
@@ -155,7 +155,8 @@ if compile_cuda_ndarray and cuda_available:
'cuda_ndarray'
,
code
,
location
=
cuda_ndarray_loc
,
include_dirs
=
[
cuda_path
],
libs
=
[
config
.
cublas
.
lib
],
include_dirs
=
[
cuda_path
],
libs
=
[
config
.
cublas
.
lib
],
preargs
=
[
'-O3'
]
+
compiler
.
compile_args
())
from
cuda_ndarray.cuda_ndarray
import
*
except
Exception
,
e
:
...
...
@@ -262,8 +263,8 @@ if cuda_available:
shared_constructor
=
float32_shared_constructor
import
basic_ops
from
basic_ops
import
(
from
.
import
basic_ops
from
.
basic_ops
import
(
GpuFromHost
,
HostFromGpu
,
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduce
,
GpuReshape
,
GpuContiguous
,
GpuSubtensor
,
GpuIncSubtensor
,
...
...
@@ -273,11 +274,11 @@ if cuda_available:
ftensor3
,
ftensor4
,
scalar
,
vector
,
matrix
,
row
,
col
,
tensor3
,
tensor4
)
from
basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
from
.
basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
as_cuda_array
,
as_cuda_ndarray_variable
)
import
opt
import
cuda_ndarray
from
rng_curand
import
CURAND_RandomStreams
from
.
import
opt
,
dnn
from
.rng_curand
import
CURAND_RandomStreams
def
use
(
device
,
...
...
theano/sandbox/cuda/cudnn_helper.h
0 → 100644
浏览文件 @
50968f7a
#ifndef CUDNN_HELPER_H
#define CUDNN_HELPER_H
#include <cudnn.h>
static
inline
const
char
*
cudnnGetErrorString
(
cudnnStatus_t
err
)
{
switch
(
err
)
{
case
CUDNN_STATUS_SUCCESS
:
return
"The operation completed successfully."
;
case
CUDNN_STATUS_NOT_INITIALIZED
:
return
"The handle was not initialized(Is your driver recent enought?)."
;
case
CUDNN_STATUS_ALLOC_FAILED
:
return
"Ressource allocation failed inside the library."
;
case
CUDNN_STATUS_BAD_PARAM
:
return
"An incorrect value was passed in."
;
case
CUDNN_STATUS_ARCH_MISMATCH
:
return
"The current GPU does not support the required features (only cc 3.0+ are supported)."
;
case
CUDNN_STATUS_MAPPING_ERROR
:
return
"An access to GPU memory space failed (probably due to a failure to bind texture)."
;
case
CUDNN_STATUS_EXECUTION_FAILED
:
return
"A kernel failed to execute."
;
case
CUDNN_STATUS_INTERNAL_ERROR
:
return
"An internal cuDNN operation failed."
;
case
CUDNN_STATUS_NOT_SUPPORTED
:
return
"The combination of parameters is not currently supported."
;
default:
return
"Unknown error code."
;
}
}
#endif
theano/sandbox/cuda/dnn.py
0 → 100644
浏览文件 @
50968f7a
import
copy
import
os
import
theano
from
theano
import
Apply
from
theano
import
tensor
from
theano.compat.six
import
StringIO
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda.basic_ops
import
(
as_cuda_ndarray_variable
,
gpu_contiguous
)
from
theano.sandbox.cuda.blas
import
GpuConv
class
GpuDnnConv
(
GpuOp
):
__props__
=
(
'border_mode'
,)
def
__init__
(
self
,
border_mode
):
self
.
border_mode
=
border_mode
def
make_node
(
self
,
img
,
kern
):
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
(
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
)
return
Apply
(
self
,
[
img
,
kern
],
[
CudaNdarrayType
(
broadcastable
)()])
def
c_headers
(
self
):
return
[
'cudnn.h'
,
'cudnn_helper.h'
]
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
)]
def
c_libraries
(
self
):
return
[
'cudnn'
]
def
c_support_code_struct
(
self
,
node
,
struct_id
):
return
"""
cudnnHandle_t handle
%(id)
d;
cudnnTensor4dDescriptor_t input
%(id)
d;
cudnnTensor4dDescriptor_t output
%(id)
d;
cudnnFilterDescriptor_t kerns
%(id)
d;
cudnnConvolutionDescriptor_t op
%(id)
d;
"""
%
dict
(
id
=
struct_id
)
def
c_init_code_struct
(
self
,
node
,
struct_id
,
sub
):
return
"""
handle
%(id)
d = NULL;
input
%(id)
d = NULL;
output
%(id)
d = NULL;
kerns
%(id)
d = NULL;
op
%(id)
d = NULL;
cudnnStatus_t err
%(id)
d;
if ((err
%(id)
d = cudnnCreate(&handle
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle:
%%
s",
cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&input
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&output
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateFilterDescriptor(&kerns
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor:
%%
s",
cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateConvolutionDescriptor(&op
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor:
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
"""
%
dict
(
id
=
struct_id
,
fail
=
sub
[
'fail'
])
def
c_cleanup_code_struct
(
self
,
node
,
struct_id
):
return
"""
cudnnDestroyTensor4dDescriptor(input
%(id)
d);
cudnnDestroyTensor4dDescriptor(output
%(id)
d);
cudnnDestroyFilterDescriptor(kerns
%(id)
d);
cudnnDestroyConvolutionDescriptor(op
%(id)
d);
cudnnDestroy(handle
%(id)
d);
"""
%
dict
(
id
=
struct_id
)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
img
,
kern
=
inputs
out
,
=
outputs
if
self
.
border_mode
==
"valid"
:
bmode
=
1
else
:
assert
self
.
border_mode
==
"full"
bmode
=
0
return
"""
cudnnStatus_t err
%(name)
s;
int pad_w
%(name)
s;
int pad_h
%(name)
s;
if (!CudaNdarray_is_c_contiguous(
%(img)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)
s
}
if (!CudaNdarray_is_c_contiguous(
%(kerns)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous filters are supported.");
%(fail)
s
}
err
%(name)
s = cudnnSetTensor4dDescriptorEx(
input
%(id)
d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(img)
s)[0],
CudaNdarray_HOST_DIMS(
%(img)
s)[1],
CudaNdarray_HOST_DIMS(
%(img)
s)[2],
CudaNdarray_HOST_DIMS(
%(img)
s)[3],
CudaNdarray_HOST_STRIDES(
%(img)
s)[0],
CudaNdarray_HOST_STRIDES(
%(img)
s)[1],
CudaNdarray_HOST_STRIDES(
%(img)
s)[2],
CudaNdarray_HOST_STRIDES(
%(img)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
err
%(name)
s = cudnnSetFilterDescriptor(
kerns
%(id)
d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(kerns)
s)[0],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[1],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[2],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set filter descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
if (
%(bmode)
d == 1) {
pad_h
%(name)
s = 0;
pad_w
%(name)
s = 0;
} else if (
%(bmode)
d == 0) {
pad_h
%(name)
s = CudaNdarray_HOST_DIMS(
%(kerns)
s)[2] - 1;
pad_w
%(name)
s = CudaNdarray_HOST_DIMS(
%(kerns)
s)[3] - 1;
} else {
PyErr_SetString(PyExc_ValueError, "bad border mode");
%(fail)
s
}
err
%(name)
s = cudnnSetConvolutionDescriptor(
op
%(id)
d, input
%(id)
d, kerns
%(id)
d,
pad_h
%(name)
s,
pad_w
%(name)
s,
1, 1, 1, 1,
CUDNN_CONVOLUTION
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
{
int out_dims[4];
err
%(name)
s = cudnnGetOutputTensor4dDim(
op
%(id)
d, CUDNN_CONVOLUTION_FWD,
&out_dims[0], &out_dims[1],
&out_dims[2], &out_dims[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
if (CudaNdarray_prep_output(&
%(out)
s, 4, out_dims) != 0) {
%(fail)
s
}
}
err
%(name)
s = cudnnSetTensor4dDescriptorEx(
output
%(id)
d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(out)
s)[0],
CudaNdarray_HOST_DIMS(
%(out)
s)[1],
CudaNdarray_HOST_DIMS(
%(out)
s)[2],
CudaNdarray_HOST_DIMS(
%(out)
s)[3],
CudaNdarray_HOST_STRIDES(
%(out)
s)[0],
CudaNdarray_HOST_STRIDES(
%(out)
s)[1],
CudaNdarray_HOST_STRIDES(
%(out)
s)[2],
CudaNdarray_HOST_STRIDES(
%(out)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set out descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
err
%(name)
s = cudnnConvolutionForward(
handle
%(id)
d,
input
%(id)
d, CudaNdarray_DEV_DATA(
%(img)
s),
kerns
%(id)
d, CudaNdarray_DEV_DATA(
%(kerns)
s),
op
%(id)
d,
output
%(id)
d, CudaNdarray_DEV_DATA(
%(out)
s),
CUDNN_RESULT_NO_ACCUMULATE
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
"""
%
dict
(
img
=
img
,
kerns
=
kern
,
out
=
out
,
bmode
=
bmode
,
fail
=
sub
[
'fail'
],
id
=
sub
[
'struct_id'
],
name
=
name
)
def
c_code_cache_version
(
self
):
return
(
4
,)
from
theano.sandbox.cuda.opt
import
(
local_optimizer
,
gpu_contiguous
,
gpu_optimizer
)
@local_optimizer
([
GpuConv
])
def
local_conv_dnn
(
node
):
if
isinstance
(
node
.
op
,
GpuConv
):
if
(
node
.
op
.
subsample
!=
(
1
,
1
)
or
node
.
op
.
border_mode
not
in
[
'full'
,
'valid'
]):
return
img
,
kern
=
node
.
inputs
border_mode
=
node
.
op
.
border_mode
return
[
GpuDnnConv
(
border_mode
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))]
gpu_optimizer
.
register
(
"conv_cudnn"
,
local_conv_dnn
,
'cudnn'
)
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
50968f7a
...
...
@@ -26,6 +26,8 @@ from theano.sandbox import cuda
if
cuda
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
from
theano.sandbox.cuda.dnn
import
GpuDnnConv
#needed as the gpu conv don't have a perform implementation.
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
theano_mode
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
...
...
@@ -615,14 +617,13 @@ def test_valid_9_10():
print_
=
print_
,
ones
=
ones
,
rtol
=
1.1e-5
)
def
test_valid
(
conv_gemm
=
False
):
def
_test_valid
(
cls
,
mode
=
None
,
extra_shapes
=
[],
version
=
[
-
1
]
):
seed_rng
()
shapes
=
get_valid_shapes
()
#shapes=shapes[400:426]
# I put -1 in case we forget to add version in the test to.
# I put -2 to test the reference version.
version
=
[
-
2
,
-
1
,
6
]
verbose
=
0
random
=
True
...
...
@@ -631,28 +632,31 @@ def test_valid(conv_gemm=False):
if
ones
:
random
=
False
if
conv_gemm
:
# Test the GpuCorrMM version
mode
=
theano_mode
.
including
(
"conv_gemm"
)
cls
=
cuda
.
blas
.
BaseGpuCorrMM
# dummy version; not used by GpuCorrMM so one version is enough
version
=
[
-
1
]
# Add tests with strided inputs by still square images and filters.
shapes
+=
get_shapes2
(
scales_img
=
(
2
,
2
),
img_stride
=
(
2
,
2
))
shapes
+=
get_shapes2
(
scales_kern
=
(
2
,
2
),
kern_stride
=
(
2
,
2
))
else
:
mode
=
theano_mode
cls
=
None
shapes
+=
extra_shapes
exec_conv
(
version
,
shapes
,
verbose
,
random
,
'valid'
,
print_
=
print_
,
ones
=
ones
,
rtol
=
1.1e-5
,
theano_mode
=
mode
,
cls
=
cls
)
def
test_valid
():
_test_valid
(
None
,
version
=
[
-
2
,
-
1
,
6
])
def
test_gemm_valid
():
test_valid
(
conv_gemm
=
True
)
extra_shapes
=
get_shapes2
(
scales_img
=
(
2
,
2
),
img_stride
=
(
2
,
2
))
extra_shapes
+=
get_shapes2
(
scales_kern
=
(
2
,
2
),
kern_stride
=
(
2
,
2
))
_test_valid
(
cuda
.
blas
.
BaseGpuCorrMM
,
mode
=
theano_mode
.
including
(
"conv_gemm"
),
extra_shapes
=
extra_shapes
)
def
test_dnn_valid
():
_test_valid
(
GpuDnnConv
,
mode
=
theano_mode
.
including
(
"cudnn"
))
def
test_full
(
conv_gemm
=
False
):
def
_test_full
(
cls
,
mode
=
None
,
version
=
[
-
1
],
extra_shapes
=
[]):
seed_rng
()
shapes
=
get_basic_shapes
()
shapes
+=
get_shapes2
()
...
...
@@ -707,25 +711,26 @@ def test_full(conv_gemm=False):
]
# shapes=shapes[:277]
version
=
[
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
,
5
]
verbose
=
0
random
=
True
if
conv_gemm
:
# Test the GpuCorrMM version
mode
=
theano_mode
.
including
(
"conv_gemm"
)
cls
=
cuda
.
blas
.
BaseGpuCorrMM
# dummy version; not used by GpuCorrMM so one version is enough
version
=
[
-
1
]
else
:
mode
=
theano_mode
cls
=
None
shapes
+=
extra_shapes
exec_conv
(
version
,
shapes
,
verbose
,
random
,
'full'
,
theano_mode
=
mode
,
cls
=
cls
)
def
test_full
():
_test_full
(
None
,
version
=
[
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
,
5
])
def
test_gemm_full
():
test_full
(
conv_gemm
=
True
)
_test_full
(
cuda
.
blas
.
BaseGpuCorrMM
,
mode
=
theano_mode
.
including
(
"conv_gemm"
))
def
test_dnn_full
():
_test_full
(
GpuDnnConv
,
mode
=
theano_mode
.
including
(
"cudnn"
))
def
test_subsample
(
conv_gemm
=
False
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论