提交 75550055 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1870 from abergeron/cuda_fftconv

Cuda fftconv
......@@ -27,6 +27,12 @@ TODO: Give examples for how to use these things! They are pretty complicated.
- Conv implemented
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`conv2d_fft <theano.sandbox.cuda.fftconv.conv2d_fft>`
This is a GPU-only version of conv2d that uses an FFT transform
to perform the work. You can enable it by setting
'THEANO_FLAGS=optimizer_including=conv_fft_valid:conv_fft_full'
in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that uses the conv2d with data reshaping.
......
import numpy
import pycuda.gpuarray
import theano.sandbox.cuda as cuda
from theano.sandbox import cuda
if cuda.cuda_available == False:
raise ImportError('Optional theano package cuda disabled')
......
......@@ -33,7 +33,6 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker.""",
StrParam('cublas'))
#is_nvcc_available called here to initialize global vars in
#nvcc_compiler module
nvcc_compiler.is_nvcc_available()
......
差异被折叠。
......@@ -40,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.scalar.basic_scipy import Erfinv
from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.sandbox.cuda.fftconv import conv2d_fft
from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix
linalg = None
......@@ -1118,8 +1119,27 @@ def local_gpu_conv(node):
# differently then the gpu ConvOp
return [out]
import theano.tensor.signal.downsample as downsample
@local_optimizer([GpuConv])
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1])]
@local_optimizer([GpuConv])
def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
import theano.tensor.signal.downsample as downsample
@register_opt()
@local_optimizer([downsample.DownsampleFactorMax])
......
import unittest
import numpy
import theano
from theano.tests import unittest_tools as utt
# Skip tests if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
from theano.sandbox.cuda import float32_shared_constructor as shared
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
class TestConv2dFFT(unittest.TestCase):
def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters,
**other_args)
conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters,
pad_last_dim=pad,
**other_args)
f_ref = theano.function([], conv_ref)
f_fft = theano.function([], conv_fft, mode=mode_with_gpu)
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
def test_valid(self):
self.run_conv(inputs_shape=(5, 3, 7, 6),
filters_shape=(2, 3, 3, 3),
border_mode='valid')
self.run_conv(inputs_shape=(5, 3, 7, 7),
filters_shape=(2, 3, 3, 3),
border_mode='valid', pad=True)
def test_full(self):
self.run_conv(inputs_shape=(5, 3, 7, 6),
filters_shape=(2, 3, 3, 3),
border_mode='full')
self.run_conv(inputs_shape=(5, 3, 7, 7),
filters_shape=(2, 3, 3, 3),
border_mode='full', pad=True)
def test_opt_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters)
mode = mode_with_gpu.including('conv_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
def test_opt_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full')
mode = mode_with_gpu.including('conv_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
......@@ -12,9 +12,11 @@ from theano.tensor.basic import _allclose, NotScalarConstantError
class TestConv2D(utt.InferShapeTester):
mode = None
dtype = 'float64'
def setUp(self):
super (TestConv2D, self).setUp()
super(TestConv2D, self).setUp()
self.input = T.dtensor4('input')
self.input.name = 'default_V'
self.filters = T.dtensor4('filters')
......@@ -67,11 +69,11 @@ class TestConv2D(utt.InferShapeTester):
output = sym_conv2d(input, filters)
output.name = 'conv2d(%s,%s)' % (input.name, filters.name)
theano_conv = theano.function([input, filters], output)
theano_conv = theano.function([input, filters], output, mode=self.mode)
# initialize input and compute result
image_data = numpy.random.random(N_image_shape)
filter_data = numpy.random.random(N_filter_shape)
image_data = numpy.random.random(N_image_shape).astype(self.dtype)
filter_data = numpy.random.random(N_filter_shape).astype(self.dtype)
try:
theano_output = theano_conv(image_data, filter_data)
except ValueError:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论