提交 75550055 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1870 from abergeron/cuda_fftconv

Cuda fftconv
...@@ -27,6 +27,12 @@ TODO: Give examples for how to use these things! They are pretty complicated. ...@@ -27,6 +27,12 @@ TODO: Give examples for how to use these things! They are pretty complicated.
- Conv implemented - Conv implemented
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`. - :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`. - :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`conv2d_fft <theano.sandbox.cuda.fftconv.conv2d_fft>`
This is a GPU-only version of conv2d that uses an FFT transform
to perform the work. You can enable it by setting
'THEANO_FLAGS=optimizer_including=conv_fft_valid:conv_fft_full'
in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU. - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` - :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that uses the conv2d with data reshaping. Another conv3d implementation that uses the conv2d with data reshaping.
......
import numpy import numpy
import pycuda.gpuarray import pycuda.gpuarray
import theano.sandbox.cuda as cuda from theano.sandbox import cuda
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise ImportError('Optional theano package cuda disabled') raise ImportError('Optional theano package cuda disabled')
......
...@@ -33,7 +33,6 @@ AddConfigVar('cublas.lib', ...@@ -33,7 +33,6 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker.""", """Name of the cuda blas library for the linker.""",
StrParam('cublas')) StrParam('cublas'))
#is_nvcc_available called here to initialize global vars in #is_nvcc_available called here to initialize global vars in
#nvcc_compiler module #nvcc_compiler module
nvcc_compiler.is_nvcc_available() nvcc_compiler.is_nvcc_available()
......
差异被折叠。
...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError ...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.scalar.basic_scipy import Erfinv from theano.scalar.basic_scipy import Erfinv
from theano.sandbox.cuda.elemwise import erfinv_gpu from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.sandbox.cuda.fftconv import conv2d_fft
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
linalg = None linalg = None
...@@ -1118,8 +1119,27 @@ def local_gpu_conv(node): ...@@ -1118,8 +1119,27 @@ def local_gpu_conv(node):
# differently then the gpu ConvOp # differently then the gpu ConvOp
return [out] return [out]
import theano.tensor.signal.downsample as downsample
@local_optimizer([GpuConv])
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1])]
@local_optimizer([GpuConv])
def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
import theano.tensor.signal.downsample as downsample
@register_opt() @register_opt()
@local_optimizer([downsample.DownsampleFactorMax]) @local_optimizer([downsample.DownsampleFactorMax])
......
import unittest
import numpy
import theano
from theano.tests import unittest_tools as utt
# Skip tests if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
from theano.sandbox.cuda import float32_shared_constructor as shared
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
class TestConv2dFFT(unittest.TestCase):
def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters,
**other_args)
conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters,
pad_last_dim=pad,
**other_args)
f_ref = theano.function([], conv_ref)
f_fft = theano.function([], conv_fft, mode=mode_with_gpu)
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
def test_valid(self):
self.run_conv(inputs_shape=(5, 3, 7, 6),
filters_shape=(2, 3, 3, 3),
border_mode='valid')
self.run_conv(inputs_shape=(5, 3, 7, 7),
filters_shape=(2, 3, 3, 3),
border_mode='valid', pad=True)
def test_full(self):
self.run_conv(inputs_shape=(5, 3, 7, 6),
filters_shape=(2, 3, 3, 3),
border_mode='full')
self.run_conv(inputs_shape=(5, 3, 7, 7),
filters_shape=(2, 3, 3, 3),
border_mode='full', pad=True)
def test_opt_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters)
mode = mode_with_gpu.including('conv_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
def test_opt_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full')
mode = mode_with_gpu.including('conv_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
...@@ -12,9 +12,11 @@ from theano.tensor.basic import _allclose, NotScalarConstantError ...@@ -12,9 +12,11 @@ from theano.tensor.basic import _allclose, NotScalarConstantError
class TestConv2D(utt.InferShapeTester): class TestConv2D(utt.InferShapeTester):
mode = None
dtype = 'float64'
def setUp(self): def setUp(self):
super (TestConv2D, self).setUp() super(TestConv2D, self).setUp()
self.input = T.dtensor4('input') self.input = T.dtensor4('input')
self.input.name = 'default_V' self.input.name = 'default_V'
self.filters = T.dtensor4('filters') self.filters = T.dtensor4('filters')
...@@ -67,11 +69,11 @@ class TestConv2D(utt.InferShapeTester): ...@@ -67,11 +69,11 @@ class TestConv2D(utt.InferShapeTester):
output = sym_conv2d(input, filters) output = sym_conv2d(input, filters)
output.name = 'conv2d(%s,%s)' % (input.name, filters.name) output.name = 'conv2d(%s,%s)' % (input.name, filters.name)
theano_conv = theano.function([input, filters], output) theano_conv = theano.function([input, filters], output, mode=self.mode)
# initialize input and compute result # initialize input and compute result
image_data = numpy.random.random(N_image_shape) image_data = numpy.random.random(N_image_shape).astype(self.dtype)
filter_data = numpy.random.random(N_filter_shape) filter_data = numpy.random.random(N_filter_shape).astype(self.dtype)
try: try:
theano_output = theano_conv(image_data, filter_data) theano_output = theano_conv(image_data, filter_data)
except ValueError: except ValueError:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论