提交 a16e91f7 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

theano.tensor.signal.Pool with 3D support.

上级 172e699c
...@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax ...@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2) op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
...@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): def dnn_pool(img, ws, stride=None, mode='max', pad=None):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
...@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img img
Images to do the pooling over. Images to do the pooling over.
ws : tuple ws : tuple
Subsampling window size. Subsampling window size. Should have 2 or 3 elements.
stride : tuple stride : tuple
Subsampling stride (default: (1, 1)). Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'} mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple pad : tuple
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
default: (0, 0) default: (0, 0) or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute .. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum": if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad) ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name context_name = ret.type.context_name
...@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): ...@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
img, ws, stride, pad = inputs img, ws, stride, pad = inputs
img = as_gpuarray_variable(img, ctx_name) nd = op.ndim if op.ndim else (img.ndim - 2)
if nd not in (2, 3):
return
img = gpu_contiguous(as_gpuarray_variable(img, ctx_name))
mode = op.mode mode = op.mode
return dnn_pool(gpu_contiguous(img), ws, stride=stride, pad=pad, mode=mode) if img.ndim == nd + 2:
return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded = pad_dims(img, 2, nd)
ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
return unpad_dims(ret_padded, img, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
...@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
inp, out, out_grad, ws, stride, pad = inputs inp, out, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name) nd = op.ndim if op.ndim else (inp.ndim - 2)
out = as_gpuarray_variable(out, ctx_name) if nd not in (2, 3):
out_grad = as_gpuarray_variable(out_grad, ctx_name) return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode mode = op.mode
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), if inp.ndim == nd + 2:
gpu_contiguous(out), return GpuDnnPoolGrad(mode=mode)(inp,
gpu_contiguous(out_grad), out,
ws, out_grad,
stride, ws,
pad) stride,
pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(inp, 2, nd)
out_padded = pad_dims(out, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
...@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
inp, out_grad, ws, stride, pad = inputs inp, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name) nd = op.ndim if op.ndim else (inp.ndim - 2)
out_grad = as_gpuarray_variable(out_grad, ctx_name) if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode mode = op.mode
cg = gpu_contiguous(out_grad) if inp.ndim == nd + 2:
# We reuse out_grad because cuDNN does not use the value of the `out`
# We reuse cg because cuDNN does not use the value of the `out` # argument but still checks its shape for average pooling. This
# argument but still checks its shape for average pooling. This # has been observed in v2 and v3 as far as I know.
# has been observed in v2 and v3 as far as I know. return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride, pad)
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ws, stride, pad) else:
inp_padded = pad_dims(inp, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_grad_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
......
...@@ -3,12 +3,12 @@ from functools import wraps ...@@ -3,12 +3,12 @@ from functools import wraps
import numpy import numpy
from theano import scalar as scal, Constant from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value, from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError) NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
...@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx): ...@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return maker(node, inputs) return maker(node, inputs)
return opt return opt
return wrapper return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
差异被折叠。
...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import ( ...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty) GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
from theano.sandbox.cuda.opt_util import pad_dims, unpad_dims
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import ( from theano.sandbox.cuda.blas import (
...@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node): ...@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm) gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
def _check_constant_args_pool(ws, stride, pad, node): def _check_constant_args_pool(ndim, ws, stride, pad, node):
"""Check if the args of pool are constants. Warns if not.""" """Check if the args of pool are constants. Warns if not."""
try: try:
ws_w = tensor.get_scalar_constant_value(ws[0]) ws = tuple(tensor.get_scalar_constant_value(ws[i]) for i in range(ndim))
ws_h = tensor.get_scalar_constant_value(ws[1]) stride = tuple(tensor.get_scalar_constant_value(stride[i]) for i in range(ndim))
stride_w = tensor.get_scalar_constant_value(stride[0]) pad = tuple(tensor.get_scalar_constant_value(pad[i]) for i in range(ndim))
stride_h = tensor.get_scalar_constant_value(stride[1])
pad_w = tensor.get_scalar_constant_value(pad[0])
pad_h = tensor.get_scalar_constant_value(pad[1])
except tensor.NotScalarConstantError: except tensor.NotScalarConstantError:
msg = ("Pool with tensor variable for the window size, stride or " msg = ("Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op " "padding is only supported in the new GPU backend, so this op "
...@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node): ...@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif config.assert_no_cpu_op == "raise": elif config.assert_no_cpu_op == "raise":
raise AssertionError(msg) raise AssertionError(msg)
return None return None
ws = (ws_w, ws_h)
stride = (stride_w, stride_h)
pad = (pad_w, pad_h)
return ws, stride, pad return ws, stride, pad
@register_opt() @register_opt()
@local_optimizer([pool.Pool]) @local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if isinstance(node.op, pool.Pool): if (isinstance(node.op, pool.Pool)):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, ws, stride, pad = node.inputs x, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if (pad) != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(ws, node.op.ignore_border) gpu_ws = GpuDownsampleFactorMax(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))] if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws(x.owner.inputs[0]))]
else:
input_4D = pad_dims(x.owner.inputs[0], 2, 2)
output_4D = gpu_ws(input_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
@local_optimizer([pool.MaxPoolGrad]) @local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, pool.MaxPoolGrad): if (isinstance(node.op, pool.MaxPoolGrad)):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gz, ws, stride, pad = node.inputs x, z, gz, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border) gpu_ws_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0], if node.inputs[0].ndim == 4:
as_cuda_ndarray_variable(z), return [host_from_gpu(gpu_ws_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(gz)))] as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gz_4D = pad_dims(as_cuda_ndarray_variable(gz), 2, 2)
output_4D = gpu_ws_grad(x_4D, z_4D, gz_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
@local_optimizer([pool.DownsampleFactorMaxGradGrad]) @local_optimizer([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node): def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad): if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gx, ws, stride, pad = node.inputs x, z, gx, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border) op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border)
return [host_from_gpu(op(x.owner.inputs[0], if node.inputs[0].ndim == 4:
as_cuda_ndarray_variable(z), return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(gx)))] as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gx_4D = pad_dims(as_cuda_ndarray_variable(gx), 2, 2)
output_4D = op(x_4D, z_4D, gx_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
......
...@@ -3,13 +3,13 @@ from functools import wraps ...@@ -3,13 +3,13 @@ from functools import wraps
import numpy import numpy
from theano import scalar as scal, Constant from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value, from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError) NotScalarConstantError)
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise) GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise, GpuReshape)
_one = scal.constant(numpy.asarray(1.0, dtype='float32')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
...@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in): ...@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return maker(targ, *inputs) return maker(targ, *inputs)
return opt return opt
return wrapper return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
...@@ -326,7 +326,9 @@ if 0: ...@@ -326,7 +326,9 @@ if 0:
def test_downsample(): def test_downsample():
shps = [(1, 1, 1, 12), shps = [(1, 12),
(1, 1, 12),
(1, 1, 1, 12),
(1, 1, 2, 2), (1, 1, 2, 2),
(1, 1, 1, 1), (1, 1, 1, 1),
(1, 1, 4, 4), (1, 1, 4, 4),
...@@ -359,17 +361,17 @@ def test_downsample(): ...@@ -359,17 +361,17 @@ def test_downsample():
for shp in shps: for shp in shps:
for ds in (2, 2), (3, 2), (1, 1): for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]: if ds[0] > shp[-2]:
continue continue
if ds[1] > shp[3]: if ds[1] > shp[-1]:
continue continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns # GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor. # in the output tensor.
if float(shp[3]) / ds[1] > 512: if float(shp[-1]) / ds[1] > 512:
continue continue
for ignore_border in (True, False): for ignore_border in (True, False):
# print 'test_downsample', shp, ds, ignore_border # print 'test_downsample', shp, ds, ignore_border
ds_op = Pool(ignore_border=ignore_border) ds_op = Pool(ndim=len(ds), ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a') a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds), f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
......
...@@ -15,8 +15,8 @@ import theano ...@@ -15,8 +15,8 @@ import theano
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
import theano.sandbox.cuda.dnn as dnn import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
...@@ -170,7 +170,7 @@ def test_dnn_conv_inplace(): ...@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0), def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_func=T.max, mode='ignore_borders'): pool_function=T.max, mode='ignore_borders'):
if strides is None: if strides is None:
strides = ds strides = ds
...@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0), ...@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2 # resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4])) first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:], pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_func, mode=mode) pool_function=pool_function, mode=mode)
shp1 = pooled1.shape shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0 # reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3])) second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1), pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_func, mode=mode) pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3])) return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
...@@ -241,8 +241,6 @@ def test_pooling(): ...@@ -241,8 +241,6 @@ def test_pooling():
func = T.max func = T.max
else: else:
func = T.mean func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean: if pad != (0, 0) and func is T.mean:
continue continue
...@@ -418,6 +416,7 @@ def test_pooling3d(): ...@@ -418,6 +416,7 @@ def test_pooling3d():
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000): if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref = theano.compile.mode.get_mode( mode_without_gpu_ref = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu') 'FAST_RUN').excluding('gpu')
...@@ -427,8 +426,7 @@ def test_pooling3d(): ...@@ -427,8 +426,7 @@ def test_pooling3d():
else: else:
modes = ('max', 'average_inc_pad', 'average_exc_pad') modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.TensorType(broadcastable=(False, False, False, False, False), x = T.ftensor5()
dtype='float32')()
for mode, pad in product(modes, for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), ((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(2, 3, 2), (3, 2, 2), (2, 2, 3))): (2, 3, 2), (3, 2, 2), (2, 2, 3))):
...@@ -436,8 +434,6 @@ def test_pooling3d(): ...@@ -436,8 +434,6 @@ def test_pooling3d():
func = T.max func = T.max
else: else:
func = T.mean func = T.mean
if pad != (0, 0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0, 0) and func is T.mean: if pad != (0, 0, 0) and func is T.mean:
continue continue
...@@ -449,13 +445,13 @@ def test_pooling3d(): ...@@ -449,13 +445,13 @@ def test_pooling3d():
if pad[0] > stride or pad[1] > stride or pad[2] > stride: if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented # Not implemented
continue continue
out1 = cuda.dnn.dnn_pool(x, (ws, ws, ws), out1 = pool_3d(x, (ws, ws, ws),
stride=(stride, stride, stride), st=(stride, stride, stride),
pad=pad, mode=mode) ignore_border=True,
out2 = pool3d2d(x, ds=(ws, ws, ws), padding=pad, mode=mode)
strides=(stride, stride, stride), out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad, pool_func=func) pad=pad,
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
...@@ -510,11 +506,17 @@ def test_pooling3d(): ...@@ -510,11 +506,17 @@ def test_pooling3d():
g_out = fg(data) g_out = fg(data)
# Compare again the CPU result # Compare again the CPU result
out = pool3d2d(x, (ws, ws, ws), out = pool_3d(x, (ws, ws, ws),
strides=(stride, stride, stride), padding=pad,
pad=pad, pool_func=func) ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x), fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu_ref) mode=mode_without_gpu_ref)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data) c_out = fc(data)
utt.assert_allclose(c_out, g_out) utt.assert_allclose(c_out, g_out)
...@@ -523,6 +525,7 @@ def test_pooling_opt(): ...@@ -523,6 +525,7 @@ def test_pooling_opt():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix() x = T.fmatrix()
f = theano.function( f = theano.function(
...@@ -535,6 +538,7 @@ def test_pooling_opt(): ...@@ -535,6 +538,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function( f = theano.function(
[x], [x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad', T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
...@@ -545,6 +549,7 @@ def test_pooling_opt(): ...@@ -545,6 +549,7 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling # Test sum pooling
f = theano.function( f = theano.function(
[x], [x],
...@@ -557,6 +562,82 @@ def test_pooling_opt(): ...@@ -557,6 +562,82 @@ def test_pooling_opt():
data = numpy.random.rand(10, 10).astype('float32') data = numpy.random.rand(10, 10).astype('float32')
f(data) f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad', ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if cuda.dnn.version() < (4004, 4004):
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = shared(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
class test_DnnSoftMax(test_nnet.test_SoftMax): class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax gpu_op = dnn.GpuDnnSoftmax
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论