提交 4c513ba6 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2259 from jia-kai/master

padding support for dnn_conv
......@@ -211,6 +211,8 @@ if cuda_available:
except EnvironmentError, e:
cuda_available = False
cuda_initialization_error_message = " ".join(e.args)
else:
cuda_initialization_error_message = 'cuda unavilable'
class GpuOp(theano.gof.Op):
......
import copy
import os
import logging
_logger = logging.getLogger(__name__)
import theano
from theano import Apply
......@@ -504,39 +506,61 @@ gpu_ger_inplace = GpuGer(inplace=True)
class BaseGpuCorrMM(GpuOp):
"""Base class for `GpuCorrMM`, `GpuCorrMM_gradWeights` and
`GpuCorrMM_gradInputs`. Cannot be used directly."""
`GpuCorrMM_gradInputs`. Cannot be used directly.
def __init__(self, border_mode="valid",
subsample=(1, 1),
pad=(0, 0)):
:param border_mode: one of 'valid', 'full', 'half'; additionally, the
padding size could be directly specified by an integer or a pair of
integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param pad: *deprecated*, now you should always use border_mode
"""
def __init__(self, border_mode="valid", subsample=(1, 1), pad=(0, 0)):
if pad != (0, 0):
_logger.warning(
'do not use pad for BaseGpuCorrMM; please set padding in'
'border_mode, see the docstring for more details')
if border_mode != "valid":
raise ValueError("border_mode must be 'valid'")
border_mode = pad
if isinstance(border_mode, int):
border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple):
pad_h, pad_w = map(int, border_mode)
border_mode = (pad_h, pad_w)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'.format(border_mode))
self.border_mode = border_mode
if len(subsample) != 2:
raise ValueError("subsample must have two elements")
self.subsample = subsample
if (pad not in ("half", "full")) and (len(pad) != 2):
raise ValueError("pad must be 'half', 'full', or have two elements")
self.pad = pad
@property
def pad(self):
if self.border_mode != 'valid':
return self.border_mode
return (0, 0)
def __eq__(self, other):
return type(self) == type(other) \
and self.border_mode == other.border_mode \
and self.subsample == other.subsample \
and self.pad == other.pad
and self.subsample == other.subsample
def __hash__(self):
return hash(type(self)) \
^ hash(self.border_mode) \
^ hash(self.subsample) \
^ hash(self.pad)
^ hash(self.subsample)
def __str__(self):
return '%s{%s, %s, pad=%r}' % (
return '%s{%s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample),
self.pad)
str(self.subsample))
def flops(self, inp, outp):
""" Useful with the hack in profilemode to print the MFlops"""
......@@ -558,7 +582,7 @@ class BaseGpuCorrMM(GpuOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (0, 23)
return (0, 24)
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -591,27 +615,28 @@ class BaseGpuCorrMM(GpuOp):
:param sub: Dictionary of substitutions useable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of the
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the height of the filters
for direction="backprop weights".
Ignored otherwise.
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the width of the filters
for direction="backprop weights".
Ignored otherwise.
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
"""
if self.border_mode != "valid":
raise ValueError("mode must be 'valid'")
dH, dW = self.subsample
if self.pad == "half":
if self.border_mode == "half":
padH = padW = -1
elif self.pad == "full":
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
padH, padW = self.pad
assert self.border_mode == "valid"
padH = padW = 0
if direction == "forward":
direction = 0
out = top
......@@ -841,9 +866,9 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom, weights = inp
top, = grads
top = gpu_contiguous(top)
d_bottom = GpuCorrMM_gradInputs(self.border_mode, self.subsample, self.pad)(
d_bottom = GpuCorrMM_gradInputs(self.border_mode, self.subsample)(
weights, top, bottom.shape[-2:])
d_weights = GpuCorrMM_gradWeights(self.border_mode, self.subsample, self.pad)(
d_weights = GpuCorrMM_gradWeights(self.border_mode, self.subsample)(
bottom, top, weights.shape[-2:])
return d_bottom, d_weights
......
......@@ -122,9 +122,7 @@ class GpuDnnConvDesc(GpuOp):
"""This Op builds a convolution descriptor for use in the other
convolution operations.
:param border_mode: 'valid' or 'full'
:param subsample: The subsample, tuple like (dx, dy)
:param conv_mode: 'conv' or 'cross'
see the doc of :func:`dnn_conv` for a description of the parameters
"""
__props__ = ('border_mode', 'subsample', 'conv_mode')
......@@ -142,7 +140,17 @@ class GpuDnnConvDesc(GpuOp):
return NVCC_compiler
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
assert border_mode in ('valid', 'full')
if isinstance(border_mode, int):
border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple):
pad_h, pad_w = map(int, border_mode)
border_mode = (pad_h, pad_w)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", an integer or a pair of'
' integers'.format(border_mode))
self.border_mode = border_mode
assert len(subsample) == 2
self.subsample = subsample
......@@ -162,6 +170,13 @@ class GpuDnnConvDesc(GpuOp):
img_shape, kern_shape = inputs
desc, = outputs
if isinstance(self.border_mode, tuple):
pad_h_spec, pad_w_spec = map(int, self.border_mode)
assert pad_h_spec >= 0 and pad_w_spec >= 0
bmode = 2
else:
pad_h_spec = pad_w_spec = 0
if self.border_mode == "valid":
bmode = 1
else:
......@@ -185,7 +200,10 @@ class GpuDnnConvDesc(GpuOp):
%(fail)s
}
if (%(bmode)d == 1) {
if (%(bmode)d == 2) {
pad_h%(name)s = %(pad_h_spec)d;
pad_w%(name)s = %(pad_w_spec)d;
} else if (%(bmode)d == 1) {
pad_h%(name)s = 0;
pad_w%(name)s = 0;
} else if (%(bmode)d == 0) {
......@@ -218,10 +236,11 @@ class GpuDnnConvDesc(GpuOp):
}
""" % dict(name=name, img_shape=img_shape, kern_shape=kern_shape, desc=desc,
bmode=bmode, conv_flag=conv_flag, fail=sub['fail'],
subsx=self.subsample[0], subsy=self.subsample[1])
subsx=self.subsample[0], subsy=self.subsample[1],
pad_h_spec=pad_h_spec, pad_w_spec=pad_w_spec)
def c_code_cache_version(self):
return (1,)
return (2,)
class GpuDnnConvBase(DnnBase):
......@@ -459,7 +478,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full' (default: 'valid')
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv')
......
......@@ -9,6 +9,7 @@ import traceback
import numpy
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
imported_scipy_convolve2d = False
try:
from scipy.signal import convolve2d
......@@ -72,16 +73,21 @@ def py_conv_valid_numpy(img, kern):
out[b, k, rr, cc] = innerprod
return out
def py_conv_pad_img(img, pad_h, pad_w):
assert pad_h >= 0 and pad_w >= 0
padded_img = numpy.zeros(
(img.shape[0], img.shape[1],
pad_h * 2 + img.shape[2], pad_w * 2 + img.shape[3]),
dtype=img.dtype)
padded_img[:, :,
pad_h: pad_h + img.shape[2],
pad_w: pad_w + img.shape[3]] = img
return padded_img
def py_conv_full_numpy(img, kern):
# manually pad the img with zeros all around, and then run it
# through py_conv_valid
pad_rows = 2 * (kern.shape[2] - 1) + img.shape[2]
pad_cols = 2 * (kern.shape[3] - 1) + img.shape[3]
padded_img = numpy.zeros((img.shape[0], img.shape[1], pad_rows, pad_cols),
dtype=img.dtype)
padded_img[:, :, kern.shape[2] - 1: kern.shape[2] - 1 + img.shape[2],
kern.shape[3] - 1: kern.shape[3] - 1 + img.shape[3]] = img
padded_img = py_conv_pad_img(img, kern.shape[2] - 1, kern.shape[3] - 1)
return py_conv_valid_numpy(padded_img, kern)
......@@ -90,6 +96,12 @@ def py_conv(img, kern, mode, subsample):
use a scipy or numpy implementation depending is scipy is available.
The scipy version is faster.
"""
if isinstance(mode, int):
mode = (mode, mode)
if isinstance(mode, tuple):
pad_h, pad_w = map(int, mode)
img = py_conv_pad_img(img, pad_h, pad_w)
mode = 'valid'
if imported_scipy_convolve2d:
return py_conv_scipy(img, kern, mode, subsample)
elif mode == 'valid':
......@@ -820,6 +832,63 @@ class TestConv2DGPU(unittest.TestCase):
finally:
theano_mode = theano_mode_orig
class TestConvWithPadding(object):
"""test conv ops that support arbitrary padding via border_mode
note that in order to make the yield work, we can not subclass from
unittest.TestCase
"""
@staticmethod
def gemm_conv_op(img, kern, border_mode):
kern = theano.sandbox.cuda.basic_ops.gpu_contiguous(
kern[:, :, ::-1, ::-1])
y = theano.sandbox.cuda.blas.GpuCorrMM(border_mode=border_mode)(
img, kern)
return y
conv_ops = []
@classmethod
def setup_class(cls):
cls.conv_ops.append(cls.gemm_conv_op)
if cuda.dnn.dnn_available():
cls.conv_ops.append(cuda.dnn.dnn_conv)
def test_invalid_arg(self):
img = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
kern = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
for i in self.conv_ops:
assert_raises(ValueError, i, img, kern,
border_mode=(-1, 0))
assert_raises(ValueError, i, img, kern,
border_mode=(0, -1))
assert_raises(ValueError, i, img, kern,
border_mode='not border')
def _run_onecase(self, img_shape, kern_shape, padding, op):
npy_img = numpy.random.rand(*img_shape).astype('float32')
npy_kern = numpy.random.rand(*kern_shape).astype('float32')
img = theano._asarray(npy_img, dtype='float32')
kern = theano.shared(npy_kern)
border_mode = padding
cpuval = py_conv(npy_img, npy_kern, border_mode, (1, 1))
X = tensor.ftensor4()
Y = op(X, kern, border_mode=border_mode)
func = theano.function([X], Y, mode=theano_mode)
gpuval = numpy.asarray(func(img))
assert_allclose(cpuval, gpuval, rtol=1e-5, atol=1e-5)
def test_numeric_value(self):
params = [
((5, 10, 4, 4), (12, 10, 4, 4), (2, 1)),
((5, 10, 8, 8), (12, 10, 4, 4), 3),
((5, 10, 6, 8), (12, 10, 3, 4), 'full'),
((5, 10, 9, 6), (12, 10, 9, 4), 'valid')
]
for img_shape, kern_shape, padding in params:
for op in self.conv_ops:
yield self._run_onecase, img_shape, kern_shape, padding, op
def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy,
direction):
......@@ -879,8 +948,7 @@ def test_gemm_directly():
def gemm_op(mode, subsample):
pad = 'full' if mode == 'full' else (0, 0)
return theano.sandbox.cuda.blas.GpuCorrMM('valid', subsample, pad)
return theano.sandbox.cuda.blas.GpuCorrMM(mode, subsample)
def dnn_op(mode, subsample):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论