提交 36694a6d authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1454 from nouiz/conv3d2d

[MRG]Conv3d2d
...@@ -28,6 +28,7 @@ env: ...@@ -28,6 +28,7 @@ env:
- PART="-e test_basic.py theano/tensor/tests" - PART="-e test_basic.py theano/tensor/tests"
script: script:
- "if [ `expr \"$PART\" : '.*sparse'` -gt \"0\" ]; then pip install scipy==0.8 --use-mirrors; fi" - "if [ `expr \"$PART\" : '.*sparse'` -gt \"0\" ]; then pip install scipy==0.8 --use-mirrors; fi"
- "if [ `expr \"$PART\" : '.*nnet'` -gt \"0\" ]; then pip install scipy==0.8 --use-mirrors; fi"
- export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise - export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
- python --version - python --version
- uname -a - uname -a
......
...@@ -16,9 +16,6 @@ ...@@ -16,9 +16,6 @@
present in convolutional neural networks (where filters are 3D and pool present in convolutional neural networks (where filters are 3D and pool
over several input channels). over several input channels).
The project `TheanoConv3d2d <https://github.com/jaberg/TheanoConv3d2d>`_
is probably faster then the Conv3d documented here.
.. module:: conv .. module:: conv
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: ops for signal processing :synopsis: ops for signal processing
...@@ -31,6 +28,21 @@ TODO: Give examples for how to use these things! They are pretty complicated. ...@@ -31,6 +28,21 @@ TODO: Give examples for how to use these things! They are pretty complicated.
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`. - :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`. - :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that use the conv2d with data reshaping.
It is faster in some case then conv3d, specificaly on the GPU.
- `Faster conv2d <http://deeplearning.net/software/pylearn2/library/alex.html>`_
This is in Pylearn2, not very documented and use a different
memory layout for the input. It is important to have the input
in the native memory layout, and not use dimshuffle on the
inputs, otherwise you loose much of the speed up. So this is not
a drop in replacement of conv2d.
Normally those are called from the `linear transfrom
<http://deeplearning.net/software/pylearn2/library/linear.html>`_
implementation.
.. autofunction:: theano.tensor.nnet.conv.conv2d .. autofunction:: theano.tensor.nnet.conv.conv2d
.. autofunction:: theano.tensor.nnet.Conv3D.conv3D .. autofunction:: theano.tensor.nnet.Conv3D.conv3D
.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
...@@ -12,11 +12,12 @@ import theano.tensor as T ...@@ -12,11 +12,12 @@ import theano.tensor as T
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
from theano.tensor.nnet.tests import test_conv3d2d
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
if theano.config.mode=='FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu') mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else: else:
...@@ -26,26 +27,28 @@ else: ...@@ -26,26 +27,28 @@ else:
def test_shape_i(): def test_shape_i():
x = cuda.ftensor3() x = cuda.ftensor3()
v = cuda.CudaNdarray(numpy.zeros((3,4,5),dtype='float32')) v = cuda.CudaNdarray(numpy.zeros((3, 4, 5), dtype='float32'))
f = theano.function([x],x.shape[1]) f = theano.function([x], x.shape[1])
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert f(v)==4 assert f(v) == 4
if theano.config.mode!='FAST_COMPILE': if theano.config.mode != 'FAST_COMPILE':
assert len(topo)==1 assert len(topo) == 1
assert isinstance(topo[0].op,T.opt.Shape_i) assert isinstance(topo[0].op, T.opt.Shape_i)
def test_shape(): def test_shape():
x = cuda.ftensor3() x = cuda.ftensor3()
v = cuda.CudaNdarray(numpy.zeros((3,4,5),dtype='float32')) v = cuda.CudaNdarray(numpy.zeros((3, 4, 5), dtype='float32'))
f = theano.function([x],x.shape) f = theano.function([x], x.shape)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert numpy.all(f(v)==(3,4,5)) assert numpy.all(f(v) == (3, 4, 5))
if theano.config.mode!='FAST_COMPILE': if theano.config.mode != 'FAST_COMPILE':
assert len(topo)==4 assert len(topo) == 4
assert isinstance(topo[0].op,T.opt.Shape_i) assert isinstance(topo[0].op, T.opt.Shape_i)
assert isinstance(topo[1].op,T.opt.Shape_i) assert isinstance(topo[1].op, T.opt.Shape_i)
assert isinstance(topo[2].op,T.opt.Shape_i) assert isinstance(topo[2].op, T.opt.Shape_i)
assert isinstance(topo[3].op,T.opt.MakeVector) assert isinstance(topo[3].op, T.opt.MakeVector)
def test_softmax_optimizations(): def test_softmax_optimizations():
from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot
...@@ -66,16 +69,17 @@ def test_softmax_optimizations(): ...@@ -66,16 +69,17 @@ def test_softmax_optimizations():
assert fgraph.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu assert fgraph.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu
assert fgraph.outputs[0].owner.inputs[0].owner.inputs[0].owner.op == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias assert fgraph.outputs[0].owner.inputs[0].owner.inputs[0].owner.op == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias
def test_may_share_memory_cuda(): def test_may_share_memory_cuda():
from theano.misc.may_share_memory import may_share_memory from theano.misc.may_share_memory import may_share_memory
a = cuda.CudaNdarray(numpy.zeros((3,4),dtype='float32')) a = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))
b = cuda.CudaNdarray(numpy.zeros((3,4),dtype='float32')) b = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))
na = numpy.zeros((3,4)) na = numpy.zeros((3, 4))
nb = numpy.zeros((3,4)) nb = numpy.zeros((3, 4))
va = a.view() va = a.view()
vb = b.view() vb = b.view()
ra = a.reshape((4,3)) ra = a.reshape((4, 3))
rb = b.reshape((4,3)) rb = b.reshape((4, 3))
#can't test the transpose as ta._strides = is not implemented #can't test the transpose as ta._strides = is not implemented
#manual transpose of a #manual transpose of a
...@@ -84,25 +88,28 @@ def test_may_share_memory_cuda(): ...@@ -84,25 +88,28 @@ def test_may_share_memory_cuda():
#elem_size=elem_size = numpy.zeros(0,dtype=a.dtype).dtype.itemsize #elem_size=elem_size = numpy.zeros(0,dtype=a.dtype).dtype.itemsize
#ta.gpudata += ta.size*elem_size #ta.gpudata += ta.size*elem_size
for a_,b_,rep in [(a,a,True),(b,b,True),(a,b,False), for a_, b_, rep in [(a, a, True), (b, b, True), (a, b, False),
(a,na,False),(b,nb,False),(na,b,False),(nb,a,False), (a, na, False), (b, nb, False),
(a,va,True),(b,vb,True),(va,b,False),(a,vb,False), (na, b, False), (nb, a, False),
(a,ra,True),(b,rb,True),(ra,b,False),(a,rb,False), (a, va, True), (b, vb, True),
(va, b, False), (a, vb, False),
(a, ra, True), (b, rb, True),
(ra, b, False), (a, rb, False),
]: ]:
assert may_share_memory(a_,b_)==rep assert may_share_memory(a_, b_) == rep
assert may_share_memory(b_,a_)==rep assert may_share_memory(b_, a_) == rep
#test that it raise error when needed. #test that it raise error when needed.
for a_,b_,rep in [(a,(0,),False),(a,1,False),(a,None,False)]: for a_, b_, rep in [(a, (0,), False), (a, 1, False), (a, None, False)]:
assert may_share_memory(a_,b_,False)==rep assert may_share_memory(a_, b_, False) == rep
assert may_share_memory(b_,a_,False)==rep assert may_share_memory(b_, a_, False) == rep
try: try:
may_share_memory(a_,b_) may_share_memory(a_, b_)
raise Exception("An error was expected") raise Exception("An error was expected")
except TypeError: except TypeError:
pass pass
try: try:
may_share_memory(b_,a_) may_share_memory(b_, a_)
raise Exception("An error was expected") raise Exception("An error was expected")
except TypeError: except TypeError:
pass pass
...@@ -127,3 +134,12 @@ def test_deepcopy(): ...@@ -127,3 +134,12 @@ def test_deepcopy():
out = f(a_v) out = f(a_v)
assert out is not a_v assert out is not a_v
assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out)) assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))
def test_get_diagonal_subtensor_view():
test_conv3d2d.test_get_diagonal_subtensor_view(wrap=cuda.CudaNdarray)
def test_conv3d():
test_conv3d2d.test_conv3d(mode=mode_with_gpu,
shared=cuda.shared_constructor)
...@@ -561,6 +561,11 @@ conv3D = Conv3D() ...@@ -561,6 +561,11 @@ conv3D = Conv3D()
:note: The order of dimensions does not correspond to the one in `conv2d`. :note: The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization. This is for optimization.
:note: The GPU implementation is very slow. You are better to use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` that is faster
on GPU.
""" """
def computeH(V,W,b,d): def computeH(V,W,b,d):
......
差异被折叠。
import time
import numpy
from scipy import ndimage
import theano
from theano.tensor.nnet.conv3d2d import *
import theano.tests.unittest_tools as utt
if theano.config.mode == 'FAST_COMPILE':
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view(wrap=lambda a: a):
x = numpy.arange(20).reshape(5, 4).astype('float32')
x = wrap(x)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert numpy.all(numpy.asarray(xv01) == [[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
#print 'x', x
#print 'xv01', xv01
#print 'xv02', xv02
assert numpy.all(numpy.asarray(xv01) == [
[[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]])
assert numpy.all(numpy.asarray(xv02) == [
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
])
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, xv12):
assert numpy.all(xvi == get_diagonal_subtensor_view(xi, 0, 1))
def pyconv3d(signals, filters):
Ns, Ts, C, Hs, Ws = signals.shape
Nf, Tf, C, Hf, Wf = filters.shape
Tf2 = Tf//2
Hf2 = Hf//2
Wf2 = Wf//2
rval = numpy.zeros((Ns, Ts-Tf+1, Nf, Hs-Hf+1, Ws-Wf+1))
for ns in xrange(Ns):
for nf in xrange(Nf):
for c in xrange(C):
s_i = signals[ns,:,c,:,:]
f_i = filters[nf,:,c,:,:]
r_i = rval[ns, :, nf, :, :]
o_i = ndimage.convolve(s_i, f_i, mode='constant', cval=1)
#print s_i.shape, f_i.shape, r_i.shape, o_i.shape
r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2]
return rval
def test_conv3d(mode=mode_without_gpu, shared=theano.tensor._shared):
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 5 , 3, 5 , 5
signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
t0 = time.time()
pyres = pyconv3d(signals, filters)
print time.time() - t0
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals*0)
out = conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode)
t0 = time.time()
newconv3d()
print time.time() - t0
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
gnewconv3d()
print 'grad', time.time() - t0
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2
signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32')
utt.verify_grad(conv3d, [signals, filters])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论