提交 6ca377fb authored 作者: Frederic's avatar Frederic

Merge remote-tracking branch 'TheanoConv3d2d/master'

import theano
from theano.gradient import DisconnectedType
from theano.gof import Op, Apply
from theano import tensor
import theano.sandbox.cuda as cuda
def get_diagonal_subtensor_view(x, i0, i1):
if x.shape[i0] < x.shape[i1]:
raise NotImplementedError('is this allowed?')
idx = [slice(None)] * x.ndim
idx[i0] = slice(x.shape[i1] - 1, None, None)
xview = x.__getitem__(tuple(idx))
strides = list(xview.strides)
strides[i1] -= strides[i0]
xview.strides = strides
return xview
class DiagonalSubtensor(Op):
"""
Work on the GPU.
"""
def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace
if inplace:
self.view_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.inplace == other.inplace
def __hash__(self):
return hash((type(self), self.inplace))
def make_node(self, x, i0, i1):
_i0 = tensor.as_tensor_variable(i0)
_i1 = tensor.as_tensor_variable(i1)
return Apply(self, [x, _i0, _i1], [x.type()])
def perform(self, node, inputs, output_storage):
xview = get_diagonal_subtensor_view(*inputs)
if self.inplace:
output_storage[0][0] = xview
else:
output_storage[0][0] = xview.copy()
def grad(self, inputs, g_outputs):
z = tensor.zeros_like(inputs[0])
gx = inc_diagonal_subtensor(z, inputs[1], inputs[2], g_outputs[0])
return [gx, DisconnectedType()(), DisconnectedType()()]
def connection_pattern(self, node):
rval = [[True], [False], [False]]
return rval
diagonal_subtensor = DiagonalSubtensor(False)
class IncDiagonalSubtensor(Op):
def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace
if inplace:
self.destroy_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.inplace == other.inplace
def __hash__(self):
return hash((type(self), self.inplace))
def make_node(self, x, i0, i1, amt):
_i0 = tensor.as_tensor_variable(i0)
_i1 = tensor.as_tensor_variable(i1)
return Apply(self, [x, _i0, _i1, amt], [x.type()])
def perform(self, node, inputs, output_storage):
x, i0, i1, amt = inputs
if not self.inplace:
x = x.copy()
xview = get_diagonal_subtensor_view(x, i0, i1)
xview += amt
output_storage[0][0] = x
def grad(self, inputs, g_outputs):
x, i0, i1, amt = inputs
gy = g_outputs[0]
return [gy, DisconnectedType()(), DisconnectedType()(),
diagonal_subtensor(gy, i0, i1)]
def connection_pattern(self, node):
rval = [[True], [False], [False], [True]]
return rval
inc_diagonal_subtensor = IncDiagonalSubtensor(False)
def conv3d(signals, filters,
signals_shape=None, filters_shape=None,
border_mode='valid', subsample=(1, 1, 1), **kwargs):
"""
Convolve spatio-temporal filters with a movie.
signals - timeseries of images whose pixels have color channels.
shape: [Ns, Ts, C, Hs, Ws]
filters - spatio-temporal filters
shape: [Nf, Tf, C, Hf, Wf]
border_mode - tuple of string mode names (or just a mode name, which means a
homogenous tuple). A mode name can be one of 'full', 'valid', and 'same'.
"""
if isinstance(border_mode, str):
border_mode = (border_mode, border_mode, border_mode)
#TODO: support variables in the shape
if signals_shape is None or filters_shape is None:
raise NotImplementedError('need shapes for now')
_signals_shape_5d = signals.shape if signals_shape is None else signals_shape
_filters_shape_5d = filters.shape if filters_shape is None else filters_shape
_signals_shape_4d = (
_signals_shape_5d[0] * _signals_shape_5d[1],
_signals_shape_5d[2],
_signals_shape_5d[3],
_signals_shape_5d[4],
)
_filters_shape_4d = (
_filters_shape_5d[0] * _filters_shape_5d[1],
_filters_shape_5d[2],
_filters_shape_5d[3],
_filters_shape_5d[4],
)
if border_mode[1] != border_mode[2]:
raise NotImplementedError('height and width bordermodes must match')
out_4d = tensor.nnet.conv2d(
signals.reshape(_signals_shape_4d),
filters.reshape(_filters_shape_4d),
image_shape=_signals_shape_4d,
filter_shape=_filters_shape_4d,
border_mode = border_mode[1]) # ignoring border_mode[2]
# reshape the output to restore its original size
# shape = Ns, Ts, Nf, Tf, W-Wf+1, H-Hf+1
if border_mode[1] == 'valid':
out_tmp = out_4d.reshape((
_signals_shape_5d[0], # Ns
_signals_shape_5d[1], # Ts
_filters_shape_5d[0], # Nf
_filters_shape_5d[1], # Tf
_signals_shape_5d[3] - _filters_shape_5d[3] + 1,
_signals_shape_5d[4] - _filters_shape_5d[4] + 1,
))
elif border_mode[1] == 'full':
out_tmp = out_4d.reshape((
_signals_shape_5d[0], # Ns
_signals_shape_5d[1], # Ts
_filters_shape_5d[0], # Nf
_filters_shape_5d[1], # Tf
_signals_shape_5d[3] + _filters_shape_5d[3] - 1,
_signals_shape_5d[4] + _filters_shape_5d[4] - 1,
))
elif border_mode[1] == 'same':
raise NotImplementedError()
else:
raise ValueError('invalid border mode', border_mode[1])
# now sum out along the Tf to get the output
# but we have to sum on a diagonal through the Tf and Ts submatrix.
if border_mode[0] == 'valid':
out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
elif border_mode[0] in ('full', 'same'):
raise NotImplementedError('sequence border mode', border_mode[0])
else:
raise ValueError('invalid border mode', border_mode[1])
return out_5d
def make_gpu_optimizer(op, to_gpu):
"""This function create optimizer that move some inputs to the GPU
for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value
are needed.
We suppose the same op work with CPU and GPU inputs.
:param op: the op that support GPU inputs
:param to_gpu: a list of op inputs that are moved to the GPU.
"""
@theano.gof.local_optimizer([])
def local_to_gpu(node):
"""
op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host)
"""
if isinstance(node.op, op):
#op(host_from_gpu()) -> host_from_gpu(op)
#If any of the input that go on the GPU are on the GPU,
#move the op to the gpu.
if any(node.inputs[idx].owner and
isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
for idx in to_gpu):
new_inp = list(node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [cuda.host_from_gpu(op()(*new_inp))]
if node.op == cuda.gpu_from_host:
#gpu_from_host(op) -> op(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
op):
op_node = host_input.owner
new_inp = list(op_node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [op()(*new_inp)]
return False
local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
cuda.opt.register_opt()(local_to_gpu)
make_gpu_optimizer(DiagonalSubtensor, [0])
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
import time
import numpy
from scipy import ndimage
import theano
from theano.sandbox import cuda
from conv3d2d import *
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view():
x = numpy.arange(20).reshape(5, 4)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert numpy.all(xv01 == [[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
#print 'x', x
#print 'xv01', xv01
#print 'xv02', xv02
assert numpy.all(xv01 == [
[[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]])
assert numpy.all(xv02 == [
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
])
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, xv12):
assert numpy.all(xvi == get_diagonal_subtensor_view(xi, 0, 1))
def test_get_diagonal_subtensor_view_gpu():
x = numpy.arange(20, dtype='float32').reshape(5, 4)
x = cuda.CudaNdarray(x)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert numpy.all(numpy.asarray(xv01) ==
[[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
#print 'x', x
#print 'xv01', xv01
#print 'xv02', xv02
assert numpy.all(numpy.asarray(xv01) == [
[[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]])
assert numpy.all(numpy.asarray(xv02) == [
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
])
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, numpy.asarray(xv12)):
assert numpy.all(numpy.asarray(xvi) ==
numpy.asarray(get_diagonal_subtensor_view(xi, 0, 1)))
def pyconv3d(signals, filters):
Ns, Ts, C, Hs, Ws = signals.shape
Nf, Tf, C, Hf, Wf = filters.shape
Tf2 = Tf//2
Hf2 = Hf//2
Wf2 = Wf//2
rval = numpy.zeros((Ns, Ts-Tf+1, Nf, Hs-Hf+1, Ws-Wf+1))
for ns in xrange(Ns):
for nf in xrange(Nf):
for c in xrange(C):
s_i = signals[ns,:,c,:,:]
f_i = filters[nf,:,c,:,:]
r_i = rval[ns, :, nf, :, :]
o_i = ndimage.convolve(s_i, f_i, mode='constant', cval=1)
#print s_i.shape, f_i.shape, r_i.shape, o_i.shape
r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2]
def test_conv3d():
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 5 , 3, 5 , 5
signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
t0 = time.time()
pyconv3d(signals, filters)
print time.time() - t0
modes = [(mode_without_gpu, theano.tensor._shared)]
if cuda.cuda_available:
modes.append((mode_with_gpu, cuda.shared_constructor))
for mode, shared in modes:
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals*0)
out = conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode)
t0 = time.time()
newconv3d()
print time.time() - t0
gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
gnewconv3d()
print 'grad', time.time() - t0
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论