提交 50968f7a authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2112 from nouiz/abergeron-dnn

Abergeron dnn
...@@ -99,7 +99,7 @@ There are less methods to define for an Op than for a Type: ...@@ -99,7 +99,7 @@ There are less methods to define for an Op than for a Type:
module is initialized, before anything else is executed and is module is initialized, before anything else is executed and is
specialized for a particular apply of an :ref:`op`. specialized for a particular apply of an :ref:`op`.
.. method:: c_init_code_struct(node, struct_id) .. method:: c_init_code_struct(node, struct_id, sub)
Allows you to specify code that will be inserted in the struct Allows you to specify code that will be inserted in the struct
constructor of the Op. This is for code which should be constructor of the Op. This is for code which should be
...@@ -108,6 +108,16 @@ There are less methods to define for an Op than for a Type: ...@@ -108,6 +108,16 @@ There are less methods to define for an Op than for a Type:
`struct_id` is an integer guaranteed to be unique inside the `struct_id` is an integer guaranteed to be unique inside the
struct. struct.
`sub` is a dictionary of extras parameters to the
c_code_init_code_struct method. It contains the following
values:
``sub['fail']``
A string of code that you should execute (after ensuring
that a python exception is set) if your C code needs to
raise an exception.
.. method:: c_support_code() .. method:: c_support_code()
Allows you to specify helper functions/structs that the Allows you to specify helper functions/structs that the
......
...@@ -94,6 +94,13 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -94,6 +94,13 @@ TODO: Give examples on how to use these things! They are pretty complicated.
f = theano.function(..., mode=mode) f = theano.function(..., mode=mode)
- :func:`GpuDnnConv <theano.sandbox.cuda.dnn.GpuDnnConv>` GPU-only
convolution using NVIDIA's cuDNN library. To enable it (and
other cudnn-acclerated ops), set
``THEANO_FLAGS=optimizer_including=cudnn`` in your environment.
This requires that you have cuDNN installed and available. It
also requires a GPU with compute capability 3.0 or more.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>` - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution applying multi-channel 3D filters to batches of 3D Convolution applying multi-channel 3D filters to batches of
multi-channel 3D images. multi-channel 3D images.
......
...@@ -143,6 +143,18 @@ def failure_code(sub): ...@@ -143,6 +143,18 @@ def failure_code(sub):
goto __label_%(id)i;}''' % sub goto __label_%(id)i;}''' % sub
def failure_code_init(sub):
"Code for failure in the struct init."
return '''{
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
return %(id)d;
}''' % sub
def code_gen(blocks): def code_gen(blocks):
"""WRITEME From a list of L{CodeBlock} instances, returns a string """WRITEME From a list of L{CodeBlock} instances, returns a string
that executes them all in sequence. eg for C{(decl1, task1, that executes them all in sequence. eg for C{(decl1, task1,
...@@ -205,8 +217,7 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -205,8 +217,7 @@ def struct_gen(args, struct_builders, blocks, sub):
# be executed if any step in the constructor fails and the # be executed if any step in the constructor fails and the
# latter only at destruction time. # latter only at destruction time.
struct_decl += block.declare struct_decl += block.declare
struct_init_head = struct_init_head + ("\n{\n%s" % block.behavior) struct_init_head = struct_init_head + ("\n%s" % block.behavior)
struct_init_tail = ("%s\n}\n" % block.cleanup) + struct_init_tail
struct_cleanup += block.cleanup struct_cleanup += block.cleanup
behavior = code_gen(blocks) behavior = code_gen(blocks)
...@@ -258,6 +269,7 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -258,6 +269,7 @@ def struct_gen(args, struct_builders, blocks, sub):
# TODO: add some error checking to make sure storage_<x> are # TODO: add some error checking to make sure storage_<x> are
# 1-element lists and __ERROR is a 3-elements list. # 1-element lists and __ERROR is a 3-elements list.
struct_code = """ struct_code = """
namespace { namespace {
struct %(name)s { struct %(name)s {
...@@ -274,13 +286,9 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -274,13 +286,9 @@ def struct_gen(args, struct_builders, blocks, sub):
int init(PyObject* __ERROR, %(args_decl)s) { int init(PyObject* __ERROR, %(args_decl)s) {
%(storage_incref)s %(storage_incref)s
%(storage_set)s %(storage_set)s
int %(failure_var)s = 0;
%(struct_init_head)s %(struct_init_head)s
this->__ERROR = __ERROR; this->__ERROR = __ERROR;
return 0; return 0;
%(struct_init_tail)s
%(storage_decref)s
%(do_return)s
} }
void cleanup(void) { void cleanup(void) {
%(struct_cleanup)s %(struct_cleanup)s
...@@ -333,7 +341,7 @@ def get_c_init(r, name, sub): ...@@ -333,7 +341,7 @@ def get_c_init(r, name, sub):
def get_c_extract(r, name, sub): def get_c_extract(r, name, sub):
"""Wrapper around c_extract that initializes py_name from storage.""" """Wrapper around c_extract that initializes py_name from storage."""
if any([getattr(c.op, 'check_input', config.check_input) for (c, _) in if any([getattr(c.op, 'check_input', config.check_input) for (c, _) in
r.clients]): r.clients]):
c_extract = r.type.c_extract(name, sub, True) c_extract = r.type.c_extract(name, sub, True)
...@@ -419,7 +427,7 @@ def struct_variable_codeblocks(variable, policies, id, symbol_table, sub): ...@@ -419,7 +427,7 @@ def struct_variable_codeblocks(variable, policies, id, symbol_table, sub):
sub = dict(sub) sub = dict(sub)
# sub['name'] = name # sub['name'] = name
sub['id'] = id sub['id'] = id
sub['fail'] = failure_code(sub) sub['fail'] = failure_code_init(sub)
sub['py_ptr'] = "py_%s" % name sub['py_ptr'] = "py_%s" % name
sub['stor_ptr'] = "storage_%s" % name sub['stor_ptr'] = "storage_%s" % name
# struct_declare, struct_behavior, struct_cleanup, sub) # struct_declare, struct_behavior, struct_cleanup, sub)
...@@ -530,9 +538,8 @@ class CLinker(link.Linker): ...@@ -530,9 +538,8 @@ class CLinker(link.Linker):
failure_var = "__failure" failure_var = "__failure"
id = 1 id = 1
sub = dict(failure_var=failure_var)
for variable in self.variables: for variable in self.variables:
sub = dict(failure_var=failure_var)
# it might be possible to inline constant variables as C literals # it might be possible to inline constant variables as C literals
# policy = [[what to declare in the struct, # policy = [[what to declare in the struct,
...@@ -634,6 +641,10 @@ class CLinker(link.Linker): ...@@ -634,6 +641,10 @@ class CLinker(link.Linker):
sub['struct_id'] = id + 1 sub['struct_id'] = id + 1
sub['fail'] = failure_code(sub) sub['fail'] = failure_code(sub)
sub_struct = dict()
sub_struct['id'] = id + 1
sub_struct['fail'] = failure_code_init(sub)
struct_support = "" struct_support = ""
struct_init = "" struct_init = ""
struct_cleanup = "" struct_cleanup = ""
...@@ -661,7 +672,7 @@ class CLinker(link.Linker): ...@@ -661,7 +672,7 @@ class CLinker(link.Linker):
" didn't return a string for c_init_code_apply") " didn't return a string for c_init_code_apply")
try: try:
struct_init = op.c_init_code_struct(node, id + 1) struct_init = op.c_init_code_struct(node, id + 1, sub_struct)
assert isinstance(struct_init, basestring), ( assert isinstance(struct_init, basestring), (
str(node.op) + str(node.op) +
" didn't return a string for c_init_code_struct") " didn't return a string for c_init_code_struct")
...@@ -1418,7 +1429,10 @@ class CLinker(link.Linker): ...@@ -1418,7 +1429,10 @@ class CLinker(link.Linker):
print >> code, ' return NULL;' print >> code, ' return NULL;'
print >> code, ' }' print >> code, ' }'
print >> code, ' %(struct_name)s* struct_ptr = new %(struct_name)s();' % locals() print >> code, ' %(struct_name)s* struct_ptr = new %(struct_name)s();' % locals()
print >> code, ' struct_ptr->init(', ','.join('PyTuple_GET_ITEM(argtuple, %i)' % n for n in xrange(n_args)), ');' print >> code, ' if (struct_ptr->init(', ','.join('PyTuple_GET_ITEM(argtuple, %i)' % n for n in xrange(n_args)), ') != 0) {'
print >> code, ' delete struct_ptr;'
print >> code, ' return NULL;'
print >> code, ' }'
if PY3: if PY3:
print >> code, """\ print >> code, """\
PyObject* thunk = PyCapsule_New((void*)(&{struct_name}_executor), NULL, {struct_name}_destructor); PyObject* thunk = PyCapsule_New((void*)(&{struct_name}_executor), NULL, {struct_name}_destructor);
......
...@@ -319,7 +319,7 @@ class CLinkerOp(CLinkerObject): ...@@ -319,7 +319,7 @@ class CLinkerOp(CLinkerObject):
raise utils.MethodNotDefined("c_init_code_apply", type(self), raise utils.MethodNotDefined("c_init_code_apply", type(self),
self.__class__.__name__) self.__class__.__name__)
def c_init_code_struct(self, node, struct_id): def c_init_code_struct(self, node, struct_id, sub):
""" """
Optional: return a code string specific to the apply Optional: return a code string specific to the apply
to be inserted in the struct initialization code. to be inserted in the struct initialization code.
...@@ -331,6 +331,11 @@ class CLinkerOp(CLinkerObject): ...@@ -331,6 +331,11 @@ class CLinkerOp(CLinkerObject):
sub parameter named struct_id that will sub parameter named struct_id that will
contain this name. contain this name.
:param sub: a dictionary of values to substitute in the code.
Most notably it contains a 'fail' entry that you
should place in your code after setting a python
exception to indicate an error.
:Exceptions: :Exceptions:
- `MethodNotDefined`: the subclass does not override this method - `MethodNotDefined`: the subclass does not override this method
""" """
......
...@@ -89,7 +89,7 @@ class StructOp(Op): ...@@ -89,7 +89,7 @@ class StructOp(Op):
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, struct_id):
return "npy_uint64 counter%d;" % (struct_id,) return "npy_uint64 counter%d;" % (struct_id,)
def c_init_code_struct(self, node, struct_id): def c_init_code_struct(self, node, struct_id, sub):
return "counter%d = 0;" % (struct_id,) return "counter%d = 0;" % (struct_id,)
def c_code(self, node, name, input_names, outputs_names, sub): def c_code(self, node, name, input_names, outputs_names, sub):
......
...@@ -155,7 +155,8 @@ if compile_cuda_ndarray and cuda_available: ...@@ -155,7 +155,8 @@ if compile_cuda_ndarray and cuda_available:
'cuda_ndarray', 'cuda_ndarray',
code, code,
location=cuda_ndarray_loc, location=cuda_ndarray_loc,
include_dirs=[cuda_path], libs=[config.cublas.lib], include_dirs=[cuda_path],
libs=[config.cublas.lib],
preargs=['-O3'] + compiler.compile_args()) preargs=['-O3'] + compiler.compile_args())
from cuda_ndarray.cuda_ndarray import * from cuda_ndarray.cuda_ndarray import *
except Exception, e: except Exception, e:
...@@ -262,8 +263,8 @@ if cuda_available: ...@@ -262,8 +263,8 @@ if cuda_available:
shared_constructor = float32_shared_constructor shared_constructor = float32_shared_constructor
import basic_ops from . import basic_ops
from basic_ops import ( from .basic_ops import (
GpuFromHost, HostFromGpu, GpuElemwise, GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuCAReduce, GpuReshape, GpuContiguous, GpuDimShuffle, GpuCAReduce, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuSubtensor, GpuIncSubtensor,
...@@ -273,11 +274,11 @@ if cuda_available: ...@@ -273,11 +274,11 @@ if cuda_available:
ftensor3, ftensor4, ftensor3, ftensor4,
scalar, vector, matrix, row, col, scalar, vector, matrix, row, col,
tensor3, tensor4) tensor3, tensor4)
from basic_ops import (host_from_gpu, gpu_from_host, from .basic_ops import (host_from_gpu, gpu_from_host,
as_cuda_array, as_cuda_ndarray_variable) as_cuda_array, as_cuda_ndarray_variable)
import opt
import cuda_ndarray import cuda_ndarray
from rng_curand import CURAND_RandomStreams from . import opt, dnn
from .rng_curand import CURAND_RandomStreams
def use(device, def use(device,
......
#ifndef CUDNN_HELPER_H
#define CUDNN_HELPER_H
#include <cudnn.h>
static inline const char *cudnnGetErrorString(cudnnStatus_t err) {
switch (err) {
case CUDNN_STATUS_SUCCESS:
return "The operation completed successfully.";
case CUDNN_STATUS_NOT_INITIALIZED:
return "The handle was not initialized(Is your driver recent enought?).";
case CUDNN_STATUS_ALLOC_FAILED:
return "Ressource allocation failed inside the library.";
case CUDNN_STATUS_BAD_PARAM:
return "An incorrect value was passed in.";
case CUDNN_STATUS_ARCH_MISMATCH:
return "The current GPU does not support the required features (only cc 3.0+ are supported).";
case CUDNN_STATUS_MAPPING_ERROR:
return "An access to GPU memory space failed (probably due to a failure to bind texture).";
case CUDNN_STATUS_EXECUTION_FAILED:
return "A kernel failed to execute.";
case CUDNN_STATUS_INTERNAL_ERROR:
return "An internal cuDNN operation failed.";
case CUDNN_STATUS_NOT_SUPPORTED:
return "The combination of parameters is not currently supported.";
default:
return "Unknown error code.";
}
}
#endif
import copy
import os
import theano
from theano import Apply
from theano import tensor
from theano.compat.six import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous)
from theano.sandbox.cuda.blas import GpuConv
class GpuDnnConv(GpuOp):
__props__ = ('border_mode',)
def __init__(self, border_mode):
self.border_mode = border_mode
def make_node(self, img, kern):
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
broadcastable = (img.type.broadcastable[0],
kern.type.broadcastable[0],
False, False)
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_libraries(self):
return ['cudnn']
def c_support_code_struct(self, node, struct_id):
return """
cudnnHandle_t handle%(id)d;
cudnnTensor4dDescriptor_t input%(id)d;
cudnnTensor4dDescriptor_t output%(id)d;
cudnnFilterDescriptor_t kerns%(id)d;
cudnnConvolutionDescriptor_t op%(id)d;
""" % dict(id=struct_id)
def c_init_code_struct(self, node, struct_id, sub):
return """
handle%(id)d = NULL;
input%(id)d = NULL;
output%(id)d = NULL;
kerns%(id)d = NULL;
op%(id)d = NULL;
cudnnStatus_t err%(id)d;
if ((err%(id)d = cudnnCreate(&handle%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s",
cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateFilterDescriptor(&kerns%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %%s",
cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateConvolutionDescriptor(&op%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor: %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
""" % dict(id=struct_id, fail=sub['fail'])
def c_cleanup_code_struct(self, node, struct_id):
return """
cudnnDestroyTensor4dDescriptor(input%(id)d);
cudnnDestroyTensor4dDescriptor(output%(id)d);
cudnnDestroyFilterDescriptor(kerns%(id)d);
cudnnDestroyConvolutionDescriptor(op%(id)d);
cudnnDestroy(handle%(id)d);
""" % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub):
img, kern = inputs
out, = outputs
if self.border_mode == "valid":
bmode = 1
else:
assert self.border_mode == "full"
bmode = 0
return """
cudnnStatus_t err%(name)s;
int pad_w%(name)s;
int pad_h%(name)s;
if (!CudaNdarray_is_c_contiguous(%(img)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)s
}
if (!CudaNdarray_is_c_contiguous(%(kerns)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous filters are supported.");
%(fail)s
}
err%(name)s = cudnnSetTensor4dDescriptorEx(
input%(id)d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(img)s)[0],
CudaNdarray_HOST_DIMS(%(img)s)[1],
CudaNdarray_HOST_DIMS(%(img)s)[2],
CudaNdarray_HOST_DIMS(%(img)s)[3],
CudaNdarray_HOST_STRIDES(%(img)s)[0],
CudaNdarray_HOST_STRIDES(%(img)s)[1],
CudaNdarray_HOST_STRIDES(%(img)s)[2],
CudaNdarray_HOST_STRIDES(%(img)s)[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
err%(name)s = cudnnSetFilterDescriptor(
kerns%(id)d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(kerns)s)[0],
CudaNdarray_HOST_DIMS(%(kerns)s)[1],
CudaNdarray_HOST_DIMS(%(kerns)s)[2],
CudaNdarray_HOST_DIMS(%(kerns)s)[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set filter descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
if (%(bmode)d == 1) {
pad_h%(name)s = 0;
pad_w%(name)s = 0;
} else if (%(bmode)d == 0) {
pad_h%(name)s = CudaNdarray_HOST_DIMS(%(kerns)s)[2] - 1;
pad_w%(name)s = CudaNdarray_HOST_DIMS(%(kerns)s)[3] - 1;
} else {
PyErr_SetString(PyExc_ValueError, "bad border mode");
%(fail)s
}
err%(name)s = cudnnSetConvolutionDescriptor(
op%(id)d, input%(id)d, kerns%(id)d,
pad_h%(name)s,
pad_w%(name)s,
1, 1, 1, 1,
CUDNN_CONVOLUTION
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
{
int out_dims[4];
err%(name)s = cudnnGetOutputTensor4dDim(
op%(id)d, CUDNN_CONVOLUTION_FWD,
&out_dims[0], &out_dims[1],
&out_dims[2], &out_dims[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) {
%(fail)s
}
}
err%(name)s = cudnnSetTensor4dDescriptorEx(
output%(id)d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(out)s)[0],
CudaNdarray_HOST_DIMS(%(out)s)[1],
CudaNdarray_HOST_DIMS(%(out)s)[2],
CudaNdarray_HOST_DIMS(%(out)s)[3],
CudaNdarray_HOST_STRIDES(%(out)s)[0],
CudaNdarray_HOST_STRIDES(%(out)s)[1],
CudaNdarray_HOST_STRIDES(%(out)s)[2],
CudaNdarray_HOST_STRIDES(%(out)s)[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set out descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
err%(name)s = cudnnConvolutionForward(
handle%(id)d,
input%(id)d, CudaNdarray_DEV_DATA(%(img)s),
kerns%(id)d, CudaNdarray_DEV_DATA(%(kerns)s),
op%(id)d,
output%(id)d, CudaNdarray_DEV_DATA(%(out)s),
CUDNN_RESULT_NO_ACCUMULATE
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(img=img, kerns=kern, out=out, bmode=bmode,
fail=sub['fail'], id=sub['struct_id'], name=name)
def c_code_cache_version(self):
return (4,)
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if (node.op.subsample != (1, 1) or
node.op.border_mode not in ['full', 'valid']):
return
img, kern = node.inputs
border_mode = node.op.border_mode
return [GpuDnnConv(border_mode)(gpu_contiguous(img),
gpu_contiguous(kern))]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
...@@ -26,6 +26,8 @@ from theano.sandbox import cuda ...@@ -26,6 +26,8 @@ from theano.sandbox import cuda
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
from theano.sandbox.cuda.dnn import GpuDnnConv
#needed as the gpu conv don't have a perform implementation. #needed as the gpu conv don't have a perform implementation.
if theano.config.mode == 'FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
theano_mode = theano.compile.mode.get_mode('FAST_RUN').including('gpu') theano_mode = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
...@@ -615,14 +617,13 @@ def test_valid_9_10(): ...@@ -615,14 +617,13 @@ def test_valid_9_10():
print_=print_, ones=ones, rtol=1.1e-5) print_=print_, ones=ones, rtol=1.1e-5)
def test_valid(conv_gemm=False): def _test_valid(cls, mode=None, extra_shapes=[], version=[-1]):
seed_rng() seed_rng()
shapes = get_valid_shapes() shapes = get_valid_shapes()
#shapes=shapes[400:426] #shapes=shapes[400:426]
# I put -1 in case we forget to add version in the test to. # I put -1 in case we forget to add version in the test to.
# I put -2 to test the reference version. # I put -2 to test the reference version.
version = [-2, -1, 6]
verbose = 0 verbose = 0
random = True random = True
...@@ -631,28 +632,31 @@ def test_valid(conv_gemm=False): ...@@ -631,28 +632,31 @@ def test_valid(conv_gemm=False):
if ones: if ones:
random = False random = False
if conv_gemm: shapes += extra_shapes
# Test the GpuCorrMM version
mode = theano_mode.including("conv_gemm")
cls = cuda.blas.BaseGpuCorrMM
# dummy version; not used by GpuCorrMM so one version is enough
version = [-1]
# Add tests with strided inputs by still square images and filters.
shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
else:
mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'valid', exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5, print_=print_, ones=ones, rtol=1.1e-5,
theano_mode=mode, cls=cls) theano_mode=mode, cls=cls)
def test_valid():
_test_valid(None, version=[-2, -1, 6])
def test_gemm_valid(): def test_gemm_valid():
test_valid(conv_gemm=True) extra_shapes = get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
extra_shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
_test_valid(cuda.blas.BaseGpuCorrMM,
mode=theano_mode.including("conv_gemm"),
extra_shapes=extra_shapes)
def test_dnn_valid():
_test_valid(GpuDnnConv, mode=theano_mode.including("cudnn"))
def test_full(conv_gemm=False):
def _test_full(cls, mode=None, version=[-1], extra_shapes=[]):
seed_rng() seed_rng()
shapes = get_basic_shapes() shapes = get_basic_shapes()
shapes += get_shapes2() shapes += get_shapes2()
...@@ -707,25 +711,26 @@ def test_full(conv_gemm=False): ...@@ -707,25 +711,26 @@ def test_full(conv_gemm=False):
] ]
# shapes=shapes[:277] # shapes=shapes[:277]
version = [-2, -1, 0, 1, 2, 3, 4, 5]
verbose = 0 verbose = 0
random = True random = True
if conv_gemm: shapes += extra_shapes
# Test the GpuCorrMM version
mode = theano_mode.including("conv_gemm")
cls = cuda.blas.BaseGpuCorrMM
# dummy version; not used by GpuCorrMM so one version is enough
version = [-1]
else:
mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'full', exec_conv(version, shapes, verbose, random, 'full',
theano_mode=mode, cls=cls) theano_mode=mode, cls=cls)
def test_full():
_test_full(None, version=[-2, -1, 0, 1, 2, 3, 4, 5])
def test_gemm_full(): def test_gemm_full():
test_full(conv_gemm=True) _test_full(cuda.blas.BaseGpuCorrMM,
mode=theano_mode.including("conv_gemm"))
def test_dnn_full():
_test_full(GpuDnnConv, mode=theano_mode.including("cudnn"))
def test_subsample(conv_gemm=False): def test_subsample(conv_gemm=False):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论