提交 a16e91f7 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

theano.tensor.signal.Pool with 3D support.

上级 172e699c
......@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
......@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
def dnn_pool(img, ws, stride=None, mode='max', pad=None):
"""
GPU pooling using cuDNN from NVIDIA.
......@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img
Images to do the pooling over.
ws : tuple
Subsampling window size.
Subsampling window size. Should have 2 or 3 elements.
stride : tuple
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple
(padX, padY) or (padX, padY, padZ)
default: (0, 0)
default: (0, 0) or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
......@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name
......@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
img, ws, stride, pad = inputs
img = as_gpuarray_variable(img, ctx_name)
nd = op.ndim if op.ndim else (img.ndim - 2)
if nd not in (2, 3):
return
img = gpu_contiguous(as_gpuarray_variable(img, ctx_name))
mode = op.mode
return dnn_pool(gpu_contiguous(img), ws, stride=stride, pad=pad, mode=mode)
if img.ndim == nd + 2:
return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded = pad_dims(img, 2, nd)
ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
return unpad_dims(ret_padded, img, 2, nd)
@register_opt('cudnn', 'fast_compile')
......@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
inp, out, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out = as_gpuarray_variable(out, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
nd = op.ndim if op.ndim else (inp.ndim - 2)
if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(out_grad),
ws,
stride,
pad)
if inp.ndim == nd + 2:
return GpuDnnPoolGrad(mode=mode)(inp,
out,
out_grad,
ws,
stride,
pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(inp, 2, nd)
out_padded = pad_dims(out, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile')
......@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
inp, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
nd = op.ndim if op.ndim else (inp.ndim - 2)
if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode
cg = gpu_contiguous(out_grad)
# We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ws, stride, pad)
if inp.ndim == nd + 2:
# We reuse out_grad because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride, pad)
else:
inp_padded = pad_dims(inp, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_grad_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile')
......
......@@ -3,12 +3,12 @@ from functools import wraps
import numpy
from theano import scalar as scal, Constant
from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
......@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return maker(node, inputs)
return opt
return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
差异被折叠。
......@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
from theano.sandbox.cuda.opt_util import pad_dims, unpad_dims
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (
......@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
def _check_constant_args_pool(ws, stride, pad, node):
def _check_constant_args_pool(ndim, ws, stride, pad, node):
"""Check if the args of pool are constants. Warns if not."""
try:
ws_w = tensor.get_scalar_constant_value(ws[0])
ws_h = tensor.get_scalar_constant_value(ws[1])
stride_w = tensor.get_scalar_constant_value(stride[0])
stride_h = tensor.get_scalar_constant_value(stride[1])
pad_w = tensor.get_scalar_constant_value(pad[0])
pad_h = tensor.get_scalar_constant_value(pad[1])
ws = tuple(tensor.get_scalar_constant_value(ws[i]) for i in range(ndim))
stride = tuple(tensor.get_scalar_constant_value(stride[i]) for i in range(ndim))
pad = tuple(tensor.get_scalar_constant_value(pad[i]) for i in range(ndim))
except tensor.NotScalarConstantError:
msg = ("Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op "
......@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif config.assert_no_cpu_op == "raise":
raise AssertionError(msg)
return None
ws = (ws_w, ws_h)
stride = (stride_w, stride_h)
pad = (pad_w, pad_h)
return ws, stride, pad
@register_opt()
@local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node):
if isinstance(node.op, pool.Pool):
assert node.op.__props__ == ('ignore_border', 'mode')
if (isinstance(node.op, pool.Pool)):
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if (pad) != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))]
gpu_ws = GpuDownsampleFactorMax(ws, node.op.ignore_border)
if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws(x.owner.inputs[0]))]
else:
input_4D = pad_dims(x.owner.inputs[0], 2, 2)
output_4D = gpu_ws(input_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
@local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, pool.MaxPoolGrad):
assert node.op.__props__ == ('ignore_border', 'mode')
if (isinstance(node.op, pool.MaxPoolGrad)):
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gz, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
gpu_ws_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gz_4D = pad_dims(as_cuda_ndarray_variable(gz), 2, 2)
output_4D = gpu_ws_grad(x_4D, z_4D, gz_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
@local_optimizer([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ignore_border', 'mode')
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gx, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border)
return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
if node.inputs[0].ndim == 4:
return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gx_4D = pad_dims(as_cuda_ndarray_variable(gx), 2, 2)
output_4D = op(x_4D, z_4D, gx_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
......
......@@ -3,13 +3,13 @@ from functools import wraps
import numpy
from theano import scalar as scal, Constant
from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise)
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise, GpuReshape)
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
......@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return maker(targ, *inputs)
return opt
return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
......@@ -326,7 +326,9 @@ if 0:
def test_downsample():
shps = [(1, 1, 1, 12),
shps = [(1, 12),
(1, 1, 12),
(1, 1, 1, 12),
(1, 1, 2, 2),
(1, 1, 1, 1),
(1, 1, 4, 4),
......@@ -359,17 +361,17 @@ def test_downsample():
for shp in shps:
for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]:
if ds[0] > shp[-2]:
continue
if ds[1] > shp[3]:
if ds[1] > shp[-1]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
if float(shp[3]) / ds[1] > 512:
if float(shp[-1]) / ds[1] > 512:
continue
for ignore_border in (True, False):
# print 'test_downsample', shp, ds, ignore_border
ds_op = Pool(ignore_border=ignore_border)
ds_op = Pool(ndim=len(ds), ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
......
......@@ -15,8 +15,8 @@ import theano
import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared
......@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_func=T.max, mode='ignore_borders'):
pool_function=T.max, mode='ignore_borders'):
if strides is None:
strides = ds
......@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_func, mode=mode)
pool_function=pool_function, mode=mode)
shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_func, mode=mode)
pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
......@@ -241,8 +241,6 @@ def test_pooling():
func = T.max
else:
func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean:
continue
......@@ -418,6 +416,7 @@ def test_pooling3d():
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
......@@ -427,8 +426,7 @@ def test_pooling3d():
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.TensorType(broadcastable=(False, False, False, False, False),
dtype='float32')()
x = T.ftensor5()
for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(2, 3, 2), (3, 2, 2), (2, 2, 3))):
......@@ -436,8 +434,6 @@ def test_pooling3d():
func = T.max
else:
func = T.mean
if pad != (0, 0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0, 0) and func is T.mean:
continue
......@@ -449,13 +445,13 @@ def test_pooling3d():
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
out1 = cuda.dnn.dnn_pool(x, (ws, ws, ws),
stride=(stride, stride, stride),
pad=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws),
strides=(stride, stride, stride),
pad=pad, pool_func=func)
out1 = pool_3d(x, (ws, ws, ws),
st=(stride, stride, stride),
ignore_border=True,
padding=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad,
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
......@@ -510,11 +506,17 @@ def test_pooling3d():
g_out = fg(data)
# Compare again the CPU result
out = pool3d2d(x, (ws, ws, ws),
strides=(stride, stride, stride),
pad=pad, pool_func=func)
out = pool_3d(x, (ws, ws, ws),
padding=pad,
ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu_ref)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data)
utt.assert_allclose(c_out, g_out)
......@@ -523,6 +525,7 @@ def test_pooling_opt():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix()
f = theano.function(
......@@ -535,6 +538,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function(
[x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
......@@ -545,6 +549,7 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
......@@ -557,6 +562,82 @@ def test_pooling_opt():
data = numpy.random.rand(10, 10).astype('float32')
f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad', ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if cuda.dnn.version() < (4004, 4004):
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = shared(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论