提交 7a10c464 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add optimization for the full mode and update the tests to work with the new way of enabling them.

上级 0c41c43b
......@@ -4,18 +4,17 @@ import numpy as np
import theano
import theano.tensor as T
from theano.sandbox.cuda import (GpuOp, basic_ops, CudaNdarrayType,
CudaNdarray)
from theano.sandbox.cuda import GpuOp, basic_ops, CudaNdarrayType
import scikits.cuda
from scikits.cuda import fft, linalg, cublas
from scikits.cuda import fft, cublas, misc
import pycuda.gpuarray
import theano.misc.pycuda_init
linalg.init()
misc.init()
# TODO: investigate the effect of enabling fastmath on FFT performance
# (how can it be enabled?).
......
......@@ -1135,8 +1135,8 @@ def local_conv_fft_full(node):
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
gpu_optimizer.register("local_conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("local_conv_fft_full", local_conv_fft_full)
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
import theano.tensor.signal.downsample as downsample
......
......@@ -19,13 +19,6 @@ else:
class TestConv2dFFT(unittest.TestCase):
def setUp(self):
self._prev = theano.confg.enable_conv2d_fft
theano.confg.enable_conv2d_fft = True
def tearDown(self):
theano.confg.enable_conv2d_fft = self._prev
def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
......@@ -63,7 +56,7 @@ class TestConv2dFFT(unittest.TestCase):
filters_shape=(2, 3, 3, 3),
border_mode='full', pad=True)
def test_opt(self):
def test_opt_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
......@@ -75,6 +68,36 @@ class TestConv2dFFT(unittest.TestCase):
conv = theano.tensor.nnet.conv.conv2d(inputs, filters)
mode = mode_with_gpu.optimizer_including('conv2d_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode_with_gpu)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert len(op for op in topo
if isinstance(op, theano.sandbox.cuda.fftconv.CuFFTOp)) == 1
res_ref = f_ref()
res_fft = f_fft()
utt.assert_allclose(res_ref, res_fft)
def test_opt_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full')
mode = mode_with_gpu.optimizer_including('conv2d_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论