提交 2451318a authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

new interface, work in progress

上级 662ea98e
"""
FIXME
"""
__docformat__ = "restructuredtext en"
import logging
import numpy
import theano
from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
patternbroadcast, NotScalarConstantError)
from theano.gof import Apply, Op
from theano.gof import local_optimizer
from theano.sandbox.cuda.basic_ops import (
gpu_contiguous, gpu_from_host, host_from_gpu
)
from theano.sandbox.cuda import gpu_optimizer, register_opt
imported_scipy_signal = False
try:
# TODO: move these back out to global scope when they no longer
# cause an atexit error
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d
imported_scipy_signal = True
except ImportError:
pass
_logger = logging.getLogger("theano.tensor.nnet.conv")
def conv2d(img,
filters,
input_shape=None,
filter_shape=None,
batch_size=None,
border_mode='valid',
subsample=(1, 1),
filter_flip=False):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
:type input: symbolic 4D tensor
:param input: mini-batch of feature map stacks, of shape
(batch size, input channels, input rows, input columns).
See the optional parameter ``input_shape``.
:type filters: symbolic 4D tensor
:param filters: set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns).
See the optional parameter ``filter_shape``.
:type input_shape: None, tuple/list of len 4 of int or Constant variable
:param input_shape: The shape of the input parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
:type filter_shape: None, tuple/list of len 4 of int or Constant variable
:param filter_shape: The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
:type border_mode: str, int or tuple of two int
:param border_mode: Either of the following:
* ``'valid'``: apply filter wherever it completely overlaps with the
input. Generates output of shape: input shape - filter shape + 1
* ``'full'``: apply filter wherever it partly overlaps with the input.
Generates output of shape: input shape + filter shape - 1
* ``'half'``: pad input with a symmetric border of ``filter rows // 2``
rows and ``filter columns // 2`` columns, then perform a valid
convolution. For filters with an odd number of rows and columns, this
leads to the output shape being equal to the input shape.
* ``int``: pad input with a symmetric border of zeros of the given
width, then perform a valid convolution.
* ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
and ``int2`` columns, then perform a valid convolution.
:type subsample: tuple of len 2
:param subsample: factor by which to subsample the output.
Also called strides elsewhere.
:type filter_flip: bool
:param filter_flip: If ``True``, will flip the filter rows and columns
before sliding them over the input. This operation is normally referred
to as a convolution, and this is the default. If ``False``, the filters
are not flipped and the operation is referred to as a cross-correlation.
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is
of shape (batch size, output channels, output rows, output columns)
"""
if (filter_flip):
filters = filters[:, :, ::-1, ::-1]
### FIXME input shape/kernel shape
conv_op = Conv2d(imshp=image_shape, kshp=filter_shape, bsize=batch_size,
border_mode="valid", subsample=(1, 1), pad=(0, 0))
return conv_op(img, filters)
class BaseConv2d(Op):
"""Base class for ConvInferace
FIXME
"""
check_broadcast = False
__props__ = ('border_mode', 'subsample')
def __init__(self,
imshp=None, kshp=None, bsize=None,
border_mode="valid", subsample=(1, 1)):
if isinstance(border_mode, int):
border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple):
pad_h, pad_w = map(int, border_mode)
border_mode = (pad_h, pad_w)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'.format(border_mode))
self.imshp = imshp
self.kshp = kshp,
self.bsize = bsize
self.border_mode = border_mode
if len(subsample) != 2:
raise ValueError("subsample must have two elements")
self.subsample = subsample
self.on_gpu = False
@property
def pad(self):
if self.border_mode != 'valid':
return self.border_mode
return (0, 0)
def __str__(self):
return '%s{%s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample))
def flops(self, inp, outp):
""" Useful with the hack in profilemode to print the MFlops"""
# if the output shape is correct, then this gives the correct
# flops for any direction, sampling, padding, and border mode
inputs, filters = inp
outputs, = outp
assert inputs[1] == filters[1]
# nb mul and add by output pixel
flops = filters[2] * filters[3] * 2
# nb flops by output image
flops *= outputs[2] * outputs[3]
# nb patch multiplied
flops *= inputs[1] * filters[0] * inputs[0]
return flops
class Conv2d(BaseConv2d):
"""
FIXME
"""
def __init__(self,
imshp=None,
kshp=None,
bsize=None,
border_mode="valid",
subsample=(1, 1),
pad=(0, 0)):
super(Conv2d, self).__init__(imshp, kshp, bsize,
border_mode, subsample, pad)
def make_node(self, img, kern):
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
False, False]
return Apply(self, [img, kern], [broadcastable()])
def perform(self, node, nodename, inp, out_, sub):
raise NotImplementedError('Conv2d theano optimization failed')
def grad(self, inp, grads):
bottom, weights = inp
top, = grads
d_bottom = Conv2d_gradInputs(self.imshp, self.kshp, self.bsize,
self.border_mode, self.subsample)(
weights, top, bottom.shape[-2:])
d_weights = Conv2d_gradWeights(self.imshp, self.kshp, self.bsize,
self.border_mode, self.subsample)(
bottom, top, weights.shape[-2:])
return d_bottom, d_weights
class Conv2d_gradWeights(BaseConv2d):
"""Gradient wrt. filters for `Conv2d`.
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self,
imshp=None,
kshp=None,
bsize=None,
border_mode="valid",
subsample=(1, 1),
pad=(0, 0)):
super(Conv2d_gradWeights, self).__init__(imshp, kshp, bsize,
border_mode, subsample, pad)
def make_node(self, img, topgrad, shape=None):
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None:
raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"')
height_width = [shape[0], shape[1]]
else:
height_width = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False]
return Apply(self, [img, topgrad] + height_width, [broadcastable()])
def perform(self, node, nodename, inp, out_, sub):
raise NotImplementedError('Conv2d_gradWeight theano optimization failed')
def grad(self, inp, grads):
bottom, top = inp[:2]
weights, = grads
d_bottom = Conv2d_gradInputs(self.imshp, self.kshp, self.bsize,
self.border_mode, self.subsample)(
weights, top, bottom.shape[-2:])
d_top = Conv2d(self.imshp, self.kshp, self.bsize,
self.border_mode, self.subsample)(bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
return (d_bottom, d_top) + d_height_width
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
class Conv2d_gradInputs(Conv2d):
"""Gradient wrt. inputs for `Conv2d`.
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self,
imshp=None,
kshp=None,
bsize=None,
border_mode="valid",
subsample=(1, 1),
pad=(0, 0)):
super(Conv2d_gradInputs, self).__init__(imshp, kshp, bsize,
border_mode, subsample, pad)
def make_node(self, kern, topgrad, shape=None):
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None:
raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [shape[0], shape[1]] if self.subsample != (1, 1) else []
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False]
return Apply(self, [kern, topgrad] + height_width, [broadcastable()])
def perform(self, node, nodename, inp, out_, sub):
raise NotImplementedError('Conv2d_gradWeight theano optimization failed')
def grad(self, inp, grads):
weights, top = inp[:2]
bottom, = grads
d_weights = Conv2d_gradWeights(self.imshp, self.kshp, self.bsize,
self.border_mode, self.subsample)(
bottom, top, weights.shape[-2:])
d_top = Conv2d(self.imshp, self.filter_shape, self.bsize,
self.border_mode, self.subsample)(bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
return (d_weights, d_top) + d_height_width
def connection_pattern(self, node):
if node.nin == 2:
return [[1], [1]]
else:
return [[1], [1], [0], [0]] # no connection to height, width
### to Gpu optimization
@local_optimizer([gpu_from_host, Conv2d, Conv2d_gradWeights, Conv2d_gradInputs])
def local_conv2d_gpu_conv(node, convop):
"""
gpu_from_host(Conv) -> (gpu)_Conv(gpu_from_host)
Conv(host_from_gpu) -> host_from_gpu((gpu)_Conv)
"""
if isinstance(node.op, GpuFromHost):
#gpu_from_host(conv) -> gpu_conv(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and \
(isinstance(host_input.owner.op, Conv2d) or
isinstance(host_input.owner.op, Conv2d_gradWeights) or
isinstance(host_input.owner.op, Conv2d_gradInputs)):
gpu_conv = host_input.owner.op
gpu_conv.on_gpu = True
img, kern = host_input.owner.inputs
out = gpu_conv(gpu_from_host(img),
gpu_from_host(kern))
out = tensor.patternbroadcast(gpu_from_host(out),
node.outputs[0].broadcastable)
#out.values_eq_approx = values_eq_approx_high_tol
return [out]
if (isinstance(node.op, Conv2d) or
isinstance(node.op, Conv2d_gradWeights) or
isinstance(node.op, Conv2d_gradInputs)):
#conv(host_from_gpu) -> host_from_gpu(gpu_conv)
img, kern = node.inputs
img_on_gpu = (img.owner and isinstance(img.owner.op, HostFromGpu))
kern_on_gpu = (kern.owner and isinstance(kern.owner.op, HostFromGpu))
if img_on_gpu or kern_on_gpu:
gpu_conv = node.op
gpu_conv.on_gpu = True
out = gpu_conv(gpu_from_host(img),
gpu_from_host(kern))
out = tensor.patternbroadcast(
out,
node.outputs[0].broadcastable)
#out.values_eq_approx = values_eq_approx_high_tol
return [out]
# We register the optimizer that moves convolutions to the GPU.
register_opt()(local_conv2d_gpu_conv)
#### GPU DNN optimization
@local_optimizer([Conv2d, Conv2d_gradWeights, Conv2d_gradInputs])
def local_conv2d_dnn(node):
if not dnn_available():
return
if border_mode not in ['full', 'valid']:
return
if (isinstance(node.op, Conv2d) and node.op.on_gpu):
img, kern = node.inputs
rval = dnn_conv(img, kern,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
direction_hint='forward')
return [rval]
if (isinstance(node.op, Conv2d_gradWeights) and node.op.on_gpu):
img, kern = node.inputs
rval = dnn_conv(img, kern,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
direction_hint='bprop weights')
return [rval]
if (isinstance(node.op, Conv2d_gradInputs) and node.op.on_gpu):
img, kern = node.inputs
rval = dnn_conv(img, kern,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
direction_hint='bprop inputs')
return [rval]
register_opt()(local_conv2d_dnn)
#### GPU CorrMM optimization
@local_optimizer([Conv2d])
def local_conv2d_gemm(node):
if (isinstance(node.op, Conv2d) and
node.on_gpu and
node.op.border_mode in ['full', 'valid']):
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
if (border_mode == 'valid') or (subsample != (1,1)):
# need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1]
# By default use GpuCorrMM
rval = GpuCorrMM(border_mode, subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.)
if ((subsample == (1,1)) and
(node.op.imshp is not None) and
(None not in node.op.imshp[-2:]) and
(node.op.kshp is not None) and
(None not in node.op.kshp)):
# we know the kernel and output size
prod1 = node.op.kshp[0] * node.op.kshp[1]
prod2 = ((node.op.imshp[-2] - node.op.kshp[0] + 1) *
(node.op.imshp[-1] - node.op.kshp[1] + 1))
if ((node.op.bsize is not None) and
(len(node.op.imshp) == 3) and
(node.op.imshp[0] is not None)):
# we also know batchsize and input channels
prod1 *= node.op.bsize
prod2 *= node.op.imshp[0]
# compare to decide
if prod1 > prod2:
# (we need to wrap the result in as_cuda_ndarray_variable,
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights(border_mode, subsample)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))
elif (border_mode == 'full'):
# need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs
rval = GpuCorrMM_gradInputs('valid', subsample)(
gpu_contiguous(kern), gpu_contiguous(img))
if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
@local_optimizer([Conv2d_gradWeights])
def local_conv2d_gradweight_gemm(node):
if isinstance(node.op, Conv2d) and node.on_gpu:
rval = GpuCorrMM_gradWeight(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
return [rval]
@local_optimizer([Conv2d_gradInputs])
def local_conv2d_gradinputs_gemm(node):
if isinstance(node.op, Conv2d) and node.on_gpu:
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
return [rval]
# First we register the optimizer that moves convolutions to the GPU.
### Cpu Optmization
@local_optimizer([Conv2d_gradWeights])
def local_conv2d_cpu(node):
if isinstance(node.op, Conv2d) and node.on_gpu:
rval = GpuCorrMM_gradWeight(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
return [rval]
@local_optimizer([Conv2d_gradWeights])
def local_conv2d_gradweight_cpu(node):
if isinstance(node.op, Conv2d) and node.on_gpu:
rval = GpuCorrMM_gradWeight(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
return [rval]
@local_optimizer([Conv2d_gradInputs])
def local_conv2d_gradinputs_cpu(node):
if isinstance(node.op, Conv2d) and node.on_gpu:
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(kern))
return [rval]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论