提交 2d1e254e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix import loop involving fftconv.

上级 48a87d25
import numpy import numpy
import pycuda.gpuarray import pycuda.gpuarray
import theano.sandbox.cuda as cuda from theano.sandbox import cuda
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise ImportError('Optional theano package cuda disabled') raise ImportError('Optional theano package cuda disabled')
......
import string
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as T
import theano.sandbox.cuda as cuda from theano.sandbox.cuda import (GpuOp, basic_ops, CudaNdarrayType,
from theano.misc.pycuda_utils import to_gpuarray CudaNdarray)
import scikits.cuda import scikits.cuda
from scikits.cuda import fft, linalg, cublas from scikits.cuda import fft, linalg, cublas
...@@ -12,8 +14,6 @@ import pycuda.gpuarray ...@@ -12,8 +14,6 @@ import pycuda.gpuarray
import theano.misc.pycuda_init import theano.misc.pycuda_init
import string
linalg.init() linalg.init()
...@@ -25,7 +25,7 @@ linalg.init() ...@@ -25,7 +25,7 @@ linalg.init()
# base class for shared code between scikits.cuda-based ops # base class for shared code between scikits.cuda-based ops
class ScikitsCudaOp(cuda.GpuOp): class ScikitsCudaOp(GpuOp):
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
...@@ -39,8 +39,8 @@ class ScikitsCudaOp(cuda.GpuOp): ...@@ -39,8 +39,8 @@ class ScikitsCudaOp(cuda.GpuOp):
raise NotImplementedError raise NotImplementedError
def make_node(self, inp): def make_node(self, inp):
inp = cuda.basic_ops.gpu_contiguous( inp = basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp)) basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32" assert inp.dtype == "float32"
...@@ -50,10 +50,11 @@ class ScikitsCudaOp(cuda.GpuOp): ...@@ -50,10 +50,11 @@ class ScikitsCudaOp(cuda.GpuOp):
class CuFFTOp(ScikitsCudaOp): class CuFFTOp(ScikitsCudaOp):
def output_type(self, inp): def output_type(self, inp):
# add one extra dim for real/imag # add one extra dim for real/imag
return cuda.CudaNdarrayType( return CudaNdarrayType(
broadcastable=[False] * (inp.type.ndim + 1)) broadcastable=[False] * (inp.type.ndim + 1))
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
from theano.misc.pycuda_utils import to_gpuarray
inputs = [storage_map[v] for v in node.inputs] inputs = [storage_map[v] for v in node.inputs]
outputs = [storage_map[v] for v in node.outputs] outputs = [storage_map[v] for v in node.outputs]
...@@ -77,7 +78,7 @@ class CuFFTOp(ScikitsCudaOp): ...@@ -77,7 +78,7 @@ class CuFFTOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the # only allocate if there is no previous allocation of the
# right size. # right size.
if z[0] is None or z[0].shape != output_shape: if z[0] is None or z[0].shape != output_shape:
z[0] = cuda.CudaNdarray.zeros(output_shape) z[0] = CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0]) input_pycuda = to_gpuarray(inputs[0][0])
# I thought we'd need to change the type on output_pycuda # I thought we'd need to change the type on output_pycuda
...@@ -104,7 +105,7 @@ class CuFFTOp(ScikitsCudaOp): ...@@ -104,7 +105,7 @@ class CuFFTOp(ScikitsCudaOp):
class CuIFFTOp(ScikitsCudaOp): class CuIFFTOp(ScikitsCudaOp):
def output_type(self, inp): def output_type(self, inp):
# remove extra real/imag dim # remove extra real/imag dim
return cuda.CudaNdarrayType( return CudaNdarrayType(
broadcastable=[False] * (inp.type.ndim - 1)) broadcastable=[False] * (inp.type.ndim - 1))
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
...@@ -129,7 +130,7 @@ class CuIFFTOp(ScikitsCudaOp): ...@@ -129,7 +130,7 @@ class CuIFFTOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the # only allocate if there is no previous allocation of the
# right size. # right size.
if z[0] is None or z[0].shape != output_shape: if z[0] is None or z[0].shape != output_shape:
z[0] = cuda.CudaNdarray.zeros(output_shape) z[0] = CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0]) input_pycuda = to_gpuarray(inputs[0][0])
# input_pycuda is a float32 array with an extra dimension, # input_pycuda is a float32 array with an extra dimension,
...@@ -162,7 +163,7 @@ def to_complex_gpuarray(x, copyif=False): ...@@ -162,7 +163,7 @@ def to_complex_gpuarray(x, copyif=False):
real/imaginary parts, and turns it into a complex64 PyCUDA real/imaginary parts, and turns it into a complex64 PyCUDA
GPUArray. GPUArray.
""" """
if not isinstance(x, cuda.CudaNdarray): if not isinstance(x, CudaNdarray):
raise ValueError("We can transfer only CudaNdarray " raise ValueError("We can transfer only CudaNdarray "
"to pycuda.gpuarray.GPUArray") "to pycuda.gpuarray.GPUArray")
else: else:
...@@ -280,10 +281,10 @@ class BatchedComplexDotOp(ScikitsCudaOp): ...@@ -280,10 +281,10 @@ class BatchedComplexDotOp(ScikitsCudaOp):
doing multiple cublasCgemm calls. doing multiple cublasCgemm calls.
""" """
def make_node(self, inp1, inp2): def make_node(self, inp1, inp2):
inp1 = cuda.basic_ops.gpu_contiguous( inp1 = basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp1)) basic_ops.as_cuda_ndarray_variable(inp1))
inp2 = cuda.basic_ops.gpu_contiguous( inp2 = basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp2)) basic_ops.as_cuda_ndarray_variable(inp2))
assert inp1.dtype == "float32" assert inp1.dtype == "float32"
assert inp2.dtype == "float32" assert inp2.dtype == "float32"
...@@ -293,7 +294,7 @@ class BatchedComplexDotOp(ScikitsCudaOp): ...@@ -293,7 +294,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()]) return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()])
def output_type(self, inp): def output_type(self, inp):
return cuda.CudaNdarrayType(broadcastable=[False] * inp.type.ndim) return CudaNdarrayType(broadcastable=[False] * inp.type.ndim)
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
inputs = [storage_map[v] for v in node.inputs] inputs = [storage_map[v] for v in node.inputs]
...@@ -314,7 +315,7 @@ class BatchedComplexDotOp(ScikitsCudaOp): ...@@ -314,7 +315,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the # only allocate if there is no previous allocation of the
# right size. # right size.
if bz[0] is None or bz[0].shape != output_shape: if bz[0] is None or bz[0].shape != output_shape:
bz[0] = cuda.CudaNdarray.zeros(output_shape) bz[0] = CudaNdarray.zeros(output_shape)
input_bx_pycuda = to_complex_gpuarray(bx[0]) input_bx_pycuda = to_complex_gpuarray(bx[0])
input_by_pycuda = to_complex_gpuarray(by[0]) input_by_pycuda = to_complex_gpuarray(by[0])
......
...@@ -32,7 +32,6 @@ from theano.sandbox.cuda.blas import gpu_ger_inplace ...@@ -32,7 +32,6 @@ from theano.sandbox.cuda.blas import gpu_ger_inplace
from theano.sandbox.cuda.blas import gpu_ger_no_inplace from theano.sandbox.cuda.blas import gpu_ger_no_inplace
from theano.sandbox.cuda.blas import (GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.fftconv import conv2d_fft
from theano.sandbox.cuda.nnet import ( from theano.sandbox.cuda.nnet import (
GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
...@@ -41,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError ...@@ -41,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.scalar.basic_scipy import Erfinv from theano.scalar.basic_scipy import Erfinv
from theano.sandbox.cuda.elemwise import erfinv_gpu from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.sandbox.cuda.fftconv import conv2d_fft
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
linalg = None linalg = None
...@@ -1124,11 +1124,9 @@ def local_gpu_conv(node): ...@@ -1124,11 +1124,9 @@ def local_gpu_conv(node):
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_fft(node): def local_conv_fft(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid'): node.op.border_mode == 'valid' and
return [conv2d_fft(node.inputs[0], node.inputs[1], node.op.subsample == (1, 1)):
image_shape=node.op.imgshp, return [conv2d_fft(node.inputs[0], node.inputs[1])]
filter_shape=node.op.kshp)]
import theano.tensor.signal.downsample as downsample import theano.tensor.signal.downsample as downsample
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论