提交 36694a6d authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1454 from nouiz/conv3d2d

[MRG]Conv3d2d
......@@ -28,6 +28,7 @@ env:
- PART="-e test_basic.py theano/tensor/tests"
script:
- "if [ `expr \"$PART\" : '.*sparse'` -gt \"0\" ]; then pip install scipy==0.8 --use-mirrors; fi"
- "if [ `expr \"$PART\" : '.*nnet'` -gt \"0\" ]; then pip install scipy==0.8 --use-mirrors; fi"
- export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
- python --version
- uname -a
......
......@@ -16,9 +16,6 @@
present in convolutional neural networks (where filters are 3D and pool
over several input channels).
The project `TheanoConv3d2d <https://github.com/jaberg/TheanoConv3d2d>`_
is probably faster then the Conv3d documented here.
.. module:: conv
:platform: Unix, Windows
:synopsis: ops for signal processing
......@@ -31,6 +28,21 @@ TODO: Give examples for how to use these things! They are pretty complicated.
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that use the conv2d with data reshaping.
It is faster in some case then conv3d, specificaly on the GPU.
- `Faster conv2d <http://deeplearning.net/software/pylearn2/library/alex.html>`_
This is in Pylearn2, not very documented and use a different
memory layout for the input. It is important to have the input
in the native memory layout, and not use dimshuffle on the
inputs, otherwise you loose much of the speed up. So this is not
a drop in replacement of conv2d.
Normally those are called from the `linear transfrom
<http://deeplearning.net/software/pylearn2/library/linear.html>`_
implementation.
.. autofunction:: theano.tensor.nnet.conv.conv2d
.. autofunction:: theano.tensor.nnet.Conv3D.conv3D
.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
......@@ -12,11 +12,12 @@ import theano.tensor as T
# Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda
from theano.tensor.nnet.tests import test_conv3d2d
if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
if theano.config.mode=='FAST_COMPILE':
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
......@@ -26,26 +27,28 @@ else:
def test_shape_i():
x = cuda.ftensor3()
v = cuda.CudaNdarray(numpy.zeros((3,4,5),dtype='float32'))
f = theano.function([x],x.shape[1])
v = cuda.CudaNdarray(numpy.zeros((3, 4, 5), dtype='float32'))
f = theano.function([x], x.shape[1])
topo = f.maker.fgraph.toposort()
assert f(v)==4
if theano.config.mode!='FAST_COMPILE':
assert len(topo)==1
assert isinstance(topo[0].op,T.opt.Shape_i)
assert f(v) == 4
if theano.config.mode != 'FAST_COMPILE':
assert len(topo) == 1
assert isinstance(topo[0].op, T.opt.Shape_i)
def test_shape():
x = cuda.ftensor3()
v = cuda.CudaNdarray(numpy.zeros((3,4,5),dtype='float32'))
f = theano.function([x],x.shape)
v = cuda.CudaNdarray(numpy.zeros((3, 4, 5), dtype='float32'))
f = theano.function([x], x.shape)
topo = f.maker.fgraph.toposort()
assert numpy.all(f(v)==(3,4,5))
if theano.config.mode!='FAST_COMPILE':
assert len(topo)==4
assert isinstance(topo[0].op,T.opt.Shape_i)
assert isinstance(topo[1].op,T.opt.Shape_i)
assert isinstance(topo[2].op,T.opt.Shape_i)
assert isinstance(topo[3].op,T.opt.MakeVector)
assert numpy.all(f(v) == (3, 4, 5))
if theano.config.mode != 'FAST_COMPILE':
assert len(topo) == 4
assert isinstance(topo[0].op, T.opt.Shape_i)
assert isinstance(topo[1].op, T.opt.Shape_i)
assert isinstance(topo[2].op, T.opt.Shape_i)
assert isinstance(topo[3].op, T.opt.MakeVector)
def test_softmax_optimizations():
from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot
......@@ -66,16 +69,17 @@ def test_softmax_optimizations():
assert fgraph.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu
assert fgraph.outputs[0].owner.inputs[0].owner.inputs[0].owner.op == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias
def test_may_share_memory_cuda():
from theano.misc.may_share_memory import may_share_memory
a = cuda.CudaNdarray(numpy.zeros((3,4),dtype='float32'))
b = cuda.CudaNdarray(numpy.zeros((3,4),dtype='float32'))
na = numpy.zeros((3,4))
nb = numpy.zeros((3,4))
a = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))
b = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))
na = numpy.zeros((3, 4))
nb = numpy.zeros((3, 4))
va = a.view()
vb = b.view()
ra = a.reshape((4,3))
rb = b.reshape((4,3))
ra = a.reshape((4, 3))
rb = b.reshape((4, 3))
#can't test the transpose as ta._strides = is not implemented
#manual transpose of a
......@@ -84,25 +88,28 @@ def test_may_share_memory_cuda():
#elem_size=elem_size = numpy.zeros(0,dtype=a.dtype).dtype.itemsize
#ta.gpudata += ta.size*elem_size
for a_,b_,rep in [(a,a,True),(b,b,True),(a,b,False),
(a,na,False),(b,nb,False),(na,b,False),(nb,a,False),
(a,va,True),(b,vb,True),(va,b,False),(a,vb,False),
(a,ra,True),(b,rb,True),(ra,b,False),(a,rb,False),
for a_, b_, rep in [(a, a, True), (b, b, True), (a, b, False),
(a, na, False), (b, nb, False),
(na, b, False), (nb, a, False),
(a, va, True), (b, vb, True),
(va, b, False), (a, vb, False),
(a, ra, True), (b, rb, True),
(ra, b, False), (a, rb, False),
]:
assert may_share_memory(a_,b_)==rep
assert may_share_memory(b_,a_)==rep
assert may_share_memory(a_, b_) == rep
assert may_share_memory(b_, a_) == rep
#test that it raise error when needed.
for a_,b_,rep in [(a,(0,),False),(a,1,False),(a,None,False)]:
assert may_share_memory(a_,b_,False)==rep
assert may_share_memory(b_,a_,False)==rep
for a_, b_, rep in [(a, (0,), False), (a, 1, False), (a, None, False)]:
assert may_share_memory(a_, b_, False) == rep
assert may_share_memory(b_, a_, False) == rep
try:
may_share_memory(a_,b_)
may_share_memory(a_, b_)
raise Exception("An error was expected")
except TypeError:
pass
try:
may_share_memory(b_,a_)
may_share_memory(b_, a_)
raise Exception("An error was expected")
except TypeError:
pass
......@@ -127,3 +134,12 @@ def test_deepcopy():
out = f(a_v)
assert out is not a_v
assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))
def test_get_diagonal_subtensor_view():
test_conv3d2d.test_get_diagonal_subtensor_view(wrap=cuda.CudaNdarray)
def test_conv3d():
test_conv3d2d.test_conv3d(mode=mode_with_gpu,
shared=cuda.shared_constructor)
......@@ -561,6 +561,11 @@ conv3D = Conv3D()
:note: The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization.
:note: The GPU implementation is very slow. You are better to use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` that is faster
on GPU.
"""
def computeH(V,W,b,d):
......
差异被折叠。
import time
import numpy
from scipy import ndimage
import theano
from theano.tensor.nnet.conv3d2d import *
import theano.tests.unittest_tools as utt
if theano.config.mode == 'FAST_COMPILE':
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view(wrap=lambda a: a):
x = numpy.arange(20).reshape(5, 4).astype('float32')
x = wrap(x)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert numpy.all(numpy.asarray(xv01) == [[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
#print 'x', x
#print 'xv01', xv01
#print 'xv02', xv02
assert numpy.all(numpy.asarray(xv01) == [
[[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]])
assert numpy.all(numpy.asarray(xv02) == [
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
])
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, xv12):
assert numpy.all(xvi == get_diagonal_subtensor_view(xi, 0, 1))
def pyconv3d(signals, filters):
Ns, Ts, C, Hs, Ws = signals.shape
Nf, Tf, C, Hf, Wf = filters.shape
Tf2 = Tf//2
Hf2 = Hf//2
Wf2 = Wf//2
rval = numpy.zeros((Ns, Ts-Tf+1, Nf, Hs-Hf+1, Ws-Wf+1))
for ns in xrange(Ns):
for nf in xrange(Nf):
for c in xrange(C):
s_i = signals[ns,:,c,:,:]
f_i = filters[nf,:,c,:,:]
r_i = rval[ns, :, nf, :, :]
o_i = ndimage.convolve(s_i, f_i, mode='constant', cval=1)
#print s_i.shape, f_i.shape, r_i.shape, o_i.shape
r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2]
return rval
def test_conv3d(mode=mode_without_gpu, shared=theano.tensor._shared):
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 5 , 3, 5 , 5
signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
t0 = time.time()
pyres = pyconv3d(signals, filters)
print time.time() - t0
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals*0)
out = conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode)
t0 = time.time()
newconv3d()
print time.time() - t0
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
gnewconv3d()
print 'grad', time.time() - t0
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2
signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32')
utt.verify_grad(conv3d, [signals, filters])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论