Merge remote-tracking branch 'TheanoConv3d2d/master'

6ca377fb · Frederic · c0291c58 · d65475af · 6ca377fb · 6ca377fb
--- a/README
+++ b/README
--- a/theanoconv3d2d/__init__.py
+++ b/theanoconv3d2d/__init__.py
--- a/theanoconv3d2d/conv3d2d.py
+++ b/theanoconv3d2d/conv3d2d.py
+import theano
+from theano.gradient import DisconnectedType
+from theano.gof import Op, Apply
+from theano import tensor
+import theano.sandbox.cuda as cuda
+def get_diagonal_subtensor_view(x, i0, i1):
+    if x.shape[i0] < x.shape[i1]:
+        raise NotImplementedError('is this allowed?')
+    idx = [slice(None)] * x.ndim
+    idx[i0] = slice(x.shape[i1] - 1, None, None)
+    xview = x.__getitem__(tuple(idx))
+    strides = list(xview.strides)
+    strides[i1] -= strides[i0]
+    xview.strides = strides
+    return xview
+class DiagonalSubtensor(Op):
+    """
+    Work on the GPU.
+    """
+    def __str__(self):
+        if self.inplace:
+            return "%s{inplace}" % self.__class__.__name__
+        return "%s" % self.__class__.__name__
+    def __init__(self, inplace=False):
+        self.inplace = inplace
+        if inplace:
+            self.view_map = {0: [0]}
+    def __eq__(self, other):
+        return type(self) == type(other) and self.inplace == other.inplace
+    def __hash__(self):
+        return hash((type(self), self.inplace))
+    def make_node(self, x, i0, i1):
+        _i0 = tensor.as_tensor_variable(i0)
+        _i1 = tensor.as_tensor_variable(i1)
+        return Apply(self, [x, _i0, _i1], [x.type()])
+    def perform(self, node, inputs, output_storage):
+        xview = get_diagonal_subtensor_view(*inputs)
+        if self.inplace:
+            output_storage[0][0] = xview
+        else:
+            output_storage[0][0] = xview.copy()
+    def grad(self, inputs, g_outputs):
+        z = tensor.zeros_like(inputs[0])
+        gx = inc_diagonal_subtensor(z, inputs[1], inputs[2], g_outputs[0])
+        return [gx, DisconnectedType()(), DisconnectedType()()]
+    def connection_pattern(self, node):
+        rval = [[True], [False], [False]]
+        return rval
+diagonal_subtensor = DiagonalSubtensor(False)
+class IncDiagonalSubtensor(Op):
+    def __str__(self):
+        if self.inplace:
+            return "%s{inplace}" % self.__class__.__name__
+        return "%s" % self.__class__.__name__
+    def __init__(self, inplace=False):
+        self.inplace = inplace
+        if inplace:
+            self.destroy_map = {0: [0]}
+    def __eq__(self, other):
+        return type(self) == type(other) and self.inplace == other.inplace
+    def __hash__(self):
+        return hash((type(self), self.inplace))
+    def make_node(self, x, i0, i1, amt):
+        _i0 = tensor.as_tensor_variable(i0)
+        _i1 = tensor.as_tensor_variable(i1)
+        return Apply(self, [x, _i0, _i1, amt], [x.type()])
+    def perform(self, node, inputs, output_storage):
+        x, i0, i1, amt = inputs
+        if not self.inplace:
+            x = x.copy()
+        xview = get_diagonal_subtensor_view(x, i0, i1)
+        xview += amt
+        output_storage[0][0] = x
+    def grad(self, inputs, g_outputs):
+        x, i0, i1, amt = inputs
+        gy = g_outputs[0]
+        return [gy, DisconnectedType()(), DisconnectedType()(),
+                diagonal_subtensor(gy, i0, i1)]
+    def connection_pattern(self, node):
+        rval = [[True], [False], [False], [True]]
+        return rval
+inc_diagonal_subtensor = IncDiagonalSubtensor(False)
+def conv3d(signals, filters,
+           signals_shape=None, filters_shape=None,
+           border_mode='valid', subsample=(1, 1, 1), **kwargs):
+    """
+    Convolve spatio-temporal filters with a movie.
+    signals - timeseries of images whose pixels have color channels.
+            shape: [Ns, Ts, C, Hs, Ws]
+    filters - spatio-temporal filters
+            shape: [Nf, Tf, C, Hf, Wf]
+    border_mode - tuple of string mode names (or just a mode name, which means a
+            homogenous tuple). A mode name can be one of 'full', 'valid', and 'same'.
+    """
+    if isinstance(border_mode, str):
+        border_mode = (border_mode, border_mode, border_mode)
+    #TODO: support variables in the shape
+    if signals_shape is None or filters_shape is None:
+        raise NotImplementedError('need shapes for now')
+    _signals_shape_5d = signals.shape if signals_shape is None else signals_shape
+    _filters_shape_5d = filters.shape if filters_shape is None else filters_shape
+    _signals_shape_4d = (
+        _signals_shape_5d[0] * _signals_shape_5d[1],
+        _signals_shape_5d[2],
+        _signals_shape_5d[3],
+        _signals_shape_5d[4],
+        )
+    _filters_shape_4d = (
+        _filters_shape_5d[0] * _filters_shape_5d[1],
+        _filters_shape_5d[2],
+        _filters_shape_5d[3],
+        _filters_shape_5d[4],
+        )
+    if border_mode[1] != border_mode[2]:
+        raise NotImplementedError('height and width bordermodes must match')
+    out_4d = tensor.nnet.conv2d(
+        signals.reshape(_signals_shape_4d),
+        filters.reshape(_filters_shape_4d),
+        image_shape=_signals_shape_4d,
+        filter_shape=_filters_shape_4d,
+        border_mode = border_mode[1])  # ignoring border_mode[2]
+    # reshape the output to restore its original size
+    # shape = Ns, Ts, Nf, Tf, W-Wf+1, H-Hf+1
+    if border_mode[1] == 'valid':
+        out_tmp = out_4d.reshape((
+            _signals_shape_5d[0],  # Ns
+            _signals_shape_5d[1],  # Ts
+            _filters_shape_5d[0],  # Nf
+            _filters_shape_5d[1],  # Tf
+            _signals_shape_5d[3] - _filters_shape_5d[3] + 1,
+            _signals_shape_5d[4] - _filters_shape_5d[4] + 1,
+            ))
+    elif border_mode[1] == 'full':
+        out_tmp = out_4d.reshape((
+            _signals_shape_5d[0],  # Ns
+            _signals_shape_5d[1],  # Ts
+            _filters_shape_5d[0],  # Nf
+            _filters_shape_5d[1],  # Tf
+            _signals_shape_5d[3] + _filters_shape_5d[3] - 1,
+            _signals_shape_5d[4] + _filters_shape_5d[4] - 1,
+            ))
+    elif border_mode[1] == 'same':
+        raise NotImplementedError()
+    else:
+        raise ValueError('invalid border mode', border_mode[1])
+    # now sum out along the Tf to get the output
+    # but we have to sum on a diagonal through the Tf and Ts submatrix.
+    if border_mode[0] == 'valid':
+        out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
+    elif border_mode[0] in ('full', 'same'):
+        raise NotImplementedError('sequence border mode', border_mode[0])
+    else:
+        raise ValueError('invalid border mode', border_mode[1])
+    return out_5d
+def make_gpu_optimizer(op, to_gpu):
+    """This function create optimizer that move some inputs to the GPU
+    for op that work on both CPU and GPU.
+    The op object is created by calling op(), so good default value
+    are needed.
+    We suppose the same op work with CPU and GPU inputs.
+    :param op: the op that support GPU inputs
+    :param to_gpu: a list of op inputs that are moved to the GPU.
+    """
+    @theano.gof.local_optimizer([])
+    def local_to_gpu(node):
+        """
+        op(host_from_gpu()) -> host_from_gpu(op)
+        gpu_from_host(op) -> op(gpu_from_host)
+        """
+        if isinstance(node.op, op):
+            #op(host_from_gpu()) -> host_from_gpu(op)
+            #If any of the input that go on the GPU are on the GPU,
+            #move the op to the gpu.
+            if any(node.inputs[idx].owner and
+                   isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
+                   for idx in to_gpu):
+                new_inp = list(node.inputs)
+                for idx in to_gpu:
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [cuda.host_from_gpu(op()(*new_inp))]
+        if node.op == cuda.gpu_from_host:
+            #gpu_from_host(op) -> op(gpu_from_host)
+            host_input = node.inputs[0]
+            if host_input.owner and isinstance(host_input.owner.op,
+                                               op):
+                op_node = host_input.owner
+                new_inp = list(op_node.inputs)
+                for idx in to_gpu:
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [op()(*new_inp)]
+        return False
+    local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
+    cuda.opt.register_opt()(local_to_gpu)
+make_gpu_optimizer(DiagonalSubtensor, [0])
+make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
--- a/theanoconv3d2d/tests.py
+++ b/theanoconv3d2d/tests.py
+import time
+import numpy
+from scipy import ndimage
+import theano
+from theano.sandbox import cuda
+from conv3d2d import *
+if theano.config.mode == 'FAST_COMPILE':
+    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
+    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
+else:
+    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
+    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
+def test_get_diagonal_subtensor_view():
+    x = numpy.arange(20).reshape(5, 4)
+    xv01 = get_diagonal_subtensor_view(x, 0, 1)
+    # test that it works in 2d
+    assert numpy.all(xv01 == [[12, 9, 6, 3], [16, 13, 10, 7]])
+    x = numpy.arange(24).reshape(4, 3, 2)
+    xv01 = get_diagonal_subtensor_view(x, 0, 1)
+    xv02 = get_diagonal_subtensor_view(x, 0, 2)
+    xv12 = get_diagonal_subtensor_view(x, 1, 2)
+    #print 'x', x
+    #print 'xv01', xv01
+    #print 'xv02', xv02
+    assert numpy.all(xv01 == [
+        [[12, 13], [8, 9], [4, 5]],
+        [[18, 19], [14, 15], [10, 11]]])
+    assert numpy.all(xv02 == [
+        [[6, 1], [8, 3], [10, 5]],
+        [[12, 7], [14, 9], [16, 11]],
+        [[18, 13], [20, 15], [22, 17]],
+        ])
+    # diagonal views of each leading matrix is the same
+    # as the slices out of the diagonal view of the entire 3d tensor
+    for xi, xvi in zip(x, xv12):
+        assert numpy.all(xvi == get_diagonal_subtensor_view(xi, 0, 1))
+def test_get_diagonal_subtensor_view_gpu():
+    x = numpy.arange(20, dtype='float32').reshape(5, 4)
+    x = cuda.CudaNdarray(x)
+    xv01 = get_diagonal_subtensor_view(x, 0, 1)
+    # test that it works in 2d
+    assert numpy.all(numpy.asarray(xv01) ==
+                     [[12, 9, 6, 3], [16, 13, 10, 7]])
+    x = numpy.arange(24).reshape(4, 3, 2)
+    xv01 = get_diagonal_subtensor_view(x, 0, 1)
+    xv02 = get_diagonal_subtensor_view(x, 0, 2)
+    xv12 = get_diagonal_subtensor_view(x, 1, 2)
+    #print 'x', x
+    #print 'xv01', xv01
+    #print 'xv02', xv02
+    assert numpy.all(numpy.asarray(xv01) == [
+        [[12, 13], [8, 9], [4, 5]],
+        [[18, 19], [14, 15], [10, 11]]])
+    assert numpy.all(numpy.asarray(xv02) == [
+        [[6, 1], [8, 3], [10, 5]],
+        [[12, 7], [14, 9], [16, 11]],
+        [[18, 13], [20, 15], [22, 17]],
+        ])
+    # diagonal views of each leading matrix is the same
+    # as the slices out of the diagonal view of the entire 3d tensor
+    for xi, xvi in zip(x, numpy.asarray(xv12)):
+        assert numpy.all(numpy.asarray(xvi) ==
+                         numpy.asarray(get_diagonal_subtensor_view(xi, 0, 1)))
+def pyconv3d(signals, filters):
+    Ns, Ts, C, Hs, Ws = signals.shape
+    Nf, Tf, C, Hf, Wf = filters.shape
+    Tf2 = Tf//2
+    Hf2 = Hf//2
+    Wf2 = Wf//2
+    rval = numpy.zeros((Ns, Ts-Tf+1, Nf, Hs-Hf+1, Ws-Wf+1))
+    for ns in xrange(Ns):
+        for nf in xrange(Nf):
+            for c in xrange(C):
+                s_i = signals[ns,:,c,:,:]
+                f_i = filters[nf,:,c,:,:]
+                r_i = rval[ns, :, nf, :, :]
+                o_i = ndimage.convolve(s_i, f_i, mode='constant', cval=1)
+                #print s_i.shape, f_i.shape, r_i.shape, o_i.shape
+                r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2]
+def test_conv3d():
+    Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
+    Nf, Tf, C, Hf, Wf = 32, 5 , 3, 5 , 5
+    signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
+    filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
+    t0 = time.time()
+    pyconv3d(signals, filters)
+    print time.time() - t0
+    modes = [(mode_without_gpu, theano.tensor._shared)]
+    if cuda.cuda_available:
+        modes.append((mode_with_gpu, cuda.shared_constructor))
+    for mode, shared in modes:
+        s_signals = shared(signals)
+        s_filters = shared(filters)
+        s_output = shared(signals*0)
+        out = conv3d(s_signals, s_filters,
+                     signals_shape=signals.shape,
+                     filters_shape=filters.shape)
+        newconv3d = theano.function([], [],
+                                    updates={s_output: out},
+                                    mode=mode)
+        t0 = time.time()
+        newconv3d()
+        print time.time() - t0
+        gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
+        gnewconv3d = theano.function([], [],
+                                     updates=[(s_filters, gfilters),
+                                              (s_signals, gsignals)],
+                                     mode=mode,
+                                     name='grad')
+        t0 = time.time()
+        gnewconv3d()
+        print 'grad', time.time() - t0