提交 6da4304d authored 作者: abergeron's avatar abergeron

Merge pull request #2556 from lamblin/dnn_pool

Dnn pool support of pad
...@@ -521,8 +521,8 @@ class BaseGpuCorrMM(GpuOp): ...@@ -521,8 +521,8 @@ class BaseGpuCorrMM(GpuOp):
def __init__(self, border_mode="valid", subsample=(1, 1), pad=(0, 0)): def __init__(self, border_mode="valid", subsample=(1, 1), pad=(0, 0)):
if pad != (0, 0): if pad != (0, 0):
_logger.warning( _logger.warning(
'do not use pad for BaseGpuCorrMM; please set padding in' 'do not use pad for BaseGpuCorrMM; please set padding in '
'border_mode, see the docstring for more details') 'border_mode parameter, see the docstring for more details')
if border_mode != "valid": if border_mode != "valid":
raise ValueError("border_mode must be 'valid'") raise ValueError("border_mode must be 'valid'")
border_mode = pad border_mode = pad
......
...@@ -10,10 +10,13 @@ from theano.compat import PY3 ...@@ -10,10 +10,13 @@ from theano.compat import PY3
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.configparser import AddConfigVar, EnumStr from theano.configparser import AddConfigVar, EnumStr
from theano.tensor.nnet import SoftmaxGrad from theano.tensor.nnet import SoftmaxGrad
from theano.tensor.signal.downsample import (
DownsampleFactorMax, DownsampleFactorMaxGrad)
from theano.tensor.basic import ShapeError from theano.tensor.basic import ShapeError
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
host_from_gpu,
gpu_contiguous, HostFromGpu, gpu_contiguous, HostFromGpu,
cp_on_negative_strides) cp_on_negative_strides)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
...@@ -85,16 +88,19 @@ dnn_available.msg = None ...@@ -85,16 +88,19 @@ dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail): def c_set_tensor4d(var, desc, err, fail):
return """ return """
{
int str0, str1, str2, str3;
str3 = CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1;
str2 = CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3];
str1 = CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3];
str0 = CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1];
%(err)s = cudnnSetTensor4dDescriptorEx( %(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT, %(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0], CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1], CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2], CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3], CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1], str0, str1, str2, str3
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
); );
if (%(err)s != CUDNN_STATUS_SUCCESS) { if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -105,13 +111,12 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) { ...@@ -105,13 +111,12 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
CudaNdarray_HOST_DIMS(%(var)s)[1], CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2], CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3], CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1], str0, str1, str2, str3
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
); );
%(fail)s %(fail)s
} }
}
""" % dict(var=var, err=err, desc=desc, fail=fail) """ % dict(var=var, err=err, desc=desc, fail=fail)
...@@ -659,8 +664,11 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -659,8 +664,11 @@ class GpuDnnPoolDesc(GpuOp):
:param ws: windows size :param ws: windows size
:param stride: (dx, dy) :param stride: (dx, dy)
:param mode: 'max' or 'average' :param mode: 'max' or 'average'
:param pad: (padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
""" """
__props__ = ('ws', 'stride', 'mode') __props__ = ('ws', 'stride', 'mode', 'pad')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -677,15 +685,27 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -677,15 +685,27 @@ class GpuDnnPoolDesc(GpuOp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, ws=(1, 1), stride=(1, 1), mode='max'): def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)):
assert mode in ('max', 'average') assert mode in ('max', 'average')
self.mode = mode self.mode = mode
assert len(ws) == 2 assert len(ws) == 2
self.ws = ws self.ws = ws
assert len(stride) == 2 assert len(stride) == 2
self.stride = stride self.stride = stride
assert len(stride) == 2
self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() < 20:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, 'pad'):
self.pad = (0, 0)
def make_node(self): def make_node(self):
if self.pad != (0, 0) and version() < 20:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
return Apply(self, [], return Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t")()]) [CDataType("cudnnPoolingDescriptor_t")()])
...@@ -720,7 +740,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -720,7 +740,7 @@ class GpuDnnPoolDesc(GpuOp):
%(desc)s, %(desc)s,
%(mode_flag)s, %(mode_flag)s,
%(wsX)d, %(wsY)d, %(wsX)d, %(wsY)d,
0, 0, %(padX)d, %(padY)d,
%(stridex)d, %(stridey)d %(stridex)d, %(stridey)d
); );
#endif #endif
...@@ -731,11 +751,13 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -731,11 +751,13 @@ class GpuDnnPoolDesc(GpuOp):
} }
} }
""" % dict(name=name, desc=desc, mode_flag=mode_flag, fail=sub['fail'], """ % dict(name=name, desc=desc, mode_flag=mode_flag, fail=sub['fail'],
wsX=self.ws[0], wsY=self.ws[1], stridex=self.stride[0], wsX=self.ws[0], wsY=self.ws[1],
stridey=self.stride[1]) stridex=self.stride[0], stridey=self.stride[1],
padX=self.pad[0], padY=self.pad[1],
)
def c_code_cache_version(self): def c_code_cache_version(self):
return (1, version()) return (2, version())
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
...@@ -845,8 +867,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -845,8 +867,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0]; %(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0];
%(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1]; %(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1];
%(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] - wsX) / strideX + 1; %(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] + (vpad*2) - wsX) / strideX + 1;
%(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] - wsY) / strideY + 1; %(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1;
if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0) if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0)
{ {
...@@ -904,7 +926,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -904,7 +926,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]] return [[1], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (4, version()) return (6, version())
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -1063,8 +1085,29 @@ _handle, ...@@ -1063,8 +1085,29 @@ _handle,
#endif #endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s", "GpuDnnPoolGrad: error doing operation: %%s. "
cudnnGetErrorString(err%(name)s)); "input.shape=(%%d, %%d, %%d, %%d) "
"input_grad.shape=(%%d, %%d, %%d, %%d) "
"output.shape=(%%d, %%d, %%d, %%d) "
"output_grad.shape=(%%d, %%d, %%d, %%d)",
cudnnGetErrorString(err%(name)s),
CudaNdarray_HOST_DIMS(%(input)s)[0],
CudaNdarray_HOST_DIMS(%(input)s)[1],
CudaNdarray_HOST_DIMS(%(input)s)[2],
CudaNdarray_HOST_DIMS(%(input)s)[3],
CudaNdarray_HOST_DIMS(%(input_grad)s)[0],
CudaNdarray_HOST_DIMS(%(input_grad)s)[1],
CudaNdarray_HOST_DIMS(%(input_grad)s)[2],
CudaNdarray_HOST_DIMS(%(input_grad)s)[3],
CudaNdarray_HOST_DIMS(%(output)s)[0],
CudaNdarray_HOST_DIMS(%(output)s)[1],
CudaNdarray_HOST_DIMS(%(output)s)[2],
CudaNdarray_HOST_DIMS(%(output)s)[3],
CudaNdarray_HOST_DIMS(%(output_grad)s)[0],
CudaNdarray_HOST_DIMS(%(output_grad)s)[1],
CudaNdarray_HOST_DIMS(%(output_grad)s)[2],
CudaNdarray_HOST_DIMS(%(output_grad)s)[3]
);
%(fail)s %(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, """ % dict(output_grad=out_grad, desc=desc,
...@@ -1077,13 +1120,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1077,13 +1120,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+name) output_grad_desc="output_grad"+name)
def c_code_cache_version(self): def c_code_cache_version(self):
return (4, version()) return (5, version())
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max'): def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
...@@ -1094,6 +1137,9 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max'): ...@@ -1094,6 +1137,9 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max'):
:param ws: subsampling window size :param ws: subsampling window size
:param stride: subsampling stride (default: (1, 1)) :param stride: subsampling stride (default: (1, 1))
:param mode: one of 'max', 'average' (default: 'max') :param mode: one of 'max', 'average' (default: 'max')
:param pad: (padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
:warning: The cuDNN library only works with GPU that have a compute :warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1101,7 +1147,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max'): ...@@ -1101,7 +1147,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max'):
:note: This Op implements the ignore_border=True of max_pool_2d. :note: This Op implements the ignore_border=True of max_pool_2d.
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode)() desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)()
return GpuDnnPool()(img, desc) return GpuDnnPool()(img, desc)
...@@ -1437,6 +1483,23 @@ if True: ...@@ -1437,6 +1483,23 @@ if True:
ds = node.op.ds ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)] return [dnn_pool(gpu_contiguous(img), ds, ds)]
@register_opt('cudnn')
@local_optimizer([DownsampleFactorMax])
def local_pool_dnn_stride(node):
if not dnn_available():
return
if isinstance(node.op, DownsampleFactorMax):
if not node.op.ignore_border:
return
img, = node.inputs
ds = node.op.ds
stride = node.op.st
pad = node.op.padding
if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride, pad=pad)
return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad]) @local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node): def local_pool_dnn_grad(node):
...@@ -1454,6 +1517,30 @@ if True: ...@@ -1454,6 +1517,30 @@ if True:
gpu_contiguous(inp_grad), gpu_contiguous(inp_grad),
desc)] desc)]
@register_opt('cudnn')
@local_optimizer([DownsampleFactorMaxGrad])
def local_pool_dnn_grad_stride(node):
if not dnn_available():
return
if isinstance(node.op, DownsampleFactorMaxGrad):
inp, out, inp_grad = node.inputs
ds = node.op.ds
st = node.op.st
pad = node.op.padding
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu))
):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)()
if not node.op.ignore_border:
return
ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
desc)
return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuSoftmax]) @local_optimizer([GpuSoftmax])
def local_softmax_dnn(node): def local_softmax_dnn(node):
......
import logging import logging
import unittest
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
...@@ -10,11 +9,10 @@ from theano.compat.six import StringIO ...@@ -10,11 +9,10 @@ from theano.compat.six import StringIO
from theano.gof.python25 import any from theano.gof.python25 import any
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs, neibs2images from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.downsample import max_pool_2d from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
import theano.sandbox.cuda.dnn as dnn import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import gpu_contiguous
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -31,6 +29,7 @@ else: ...@@ -31,6 +29,7 @@ else:
def pool_2d_i2n(input, ds=(2, 2), strides=None, def pool_2d_i2n(input, ds=(2, 2), strides=None,
pad=(0, 0),
pool_function=T.max, mode='ignore_borders'): pool_function=T.max, mode='ignore_borders'):
if strides is None: if strides is None:
strides = ds strides = ds
...@@ -40,8 +39,19 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, ...@@ -40,8 +39,19 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
"strides should be smaller than or equal to ds," "strides should be smaller than or equal to ds,"
" strides=(%d, %d) and ds=(%d, %d)" % " strides=(%d, %d) and ds=(%d, %d)" %
(strides + ds)) (strides + ds))
shape = input.shape shape = input.shape
if pad != (0, 0):
assert pool_function is T.max
pad_x = pad[0]
pad_y = pad[1]
a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x*2,
shape[3] + pad_y*2)
input = T.set_subtensor(a[:, :,
pad_x:pad_x+shape[2],
pad_y:pad_y+shape[3]],
input)
shape = input.shape
neibs = images2neibs(input, ds, strides, mode=mode) neibs = images2neibs(input, ds, strides, mode=mode)
pooled_neibs = pool_function(neibs, axis=1) pooled_neibs = pool_function(neibs, axis=1)
...@@ -58,33 +68,41 @@ def test_pooling(): ...@@ -58,33 +68,41 @@ def test_pooling():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4() x = T.ftensor4()
for func, pad in product((T.max, T.mean),
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
if pad != (0, 0) and cuda.dnn.version() < 20:
continue
for func in (T.max, T.mean): for ws in (4, 2, 5):
for ws in (2, 4, 5):
for stride in (2, 3): for stride in (2, 3):
if stride > ws: if stride > ws:
continue continue
if ws == stride and func is T.max: if func is T.max:
# We will check that the opt introduced it. # We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws), ignore_border=True) out1 = max_pool_2d(x, (ws, ws),
st=(stride, stride),
ignore_border=True,
padding=pad)
else: else:
out1 = cuda.dnn.dnn_pool( out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws), x, ws=(ws, ws),
stride=(stride, stride), stride=(stride, stride),
pad=pad,
mode='max' if func is T.max else "average") mode='max' if func is T.max else "average")
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride), out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pad=pad,
pool_function=func) pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_with_gpu) f2 = theano.function([x], out2, mode=mode_without_gpu)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes]) for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100), for shp in [(1, 10, 100, 100),
(1, 3, 99, 99), (1, 3, 99, 99),
(32, 1, 147, 197), (32, 1, 147, 197),
]: ]:
data = numpy.random.normal(0, 1, shp).astype("float32") data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__() a = f1(data).__array__()
...@@ -98,45 +116,50 @@ def test_pooling(): ...@@ -98,45 +116,50 @@ def test_pooling():
data = numpy.random.normal(0, 1, shp).astype("float32")*10 data = numpy.random.normal(0, 1, shp).astype("float32")*10
ws = 2 ws = 2
strides = 2 stride = 2
# This test the CPU grad + opt + GPU implemtentation # This test the CPU grad + opt + GPU implemtentation
def fn(x): def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True) return max_pool_2d(x, (ws, ws), ignore_border=True,
padding=pad)
theano.tests.unittest_tools.verify_grad(fn, [data], theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False, cast_to_output_type=False,
mode=mode_with_gpu) mode=mode_with_gpu)
# Confirm that the opt would have inserted it. # Confirm that the opt would have inserted it.
f = theano.function([x], theano.grad(fn(x).sum(), x), fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu) mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()]) for node in fg.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation # Test the GPU grad + GPU implementation
def fn(x): def fn(x):
dnn_op = cuda.dnn.dnn_pool( dnn_op = cuda.dnn.dnn_pool(
x, ws=(ws, ws), x, ws=(ws, ws),
stride=(stride, stride), stride=(stride, stride),
pad=pad,
mode='max' if func is T.max else "average") mode='max' if func is T.max else "average")
return dnn_op return dnn_op
theano.tests.unittest_tools.verify_grad(fn, [data], theano.tests.unittest_tools.verify_grad(
cast_to_output_type=False, fn, [data],
mode=mode_with_gpu) cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op. # Confirm that we get the good op.
f = theano.function([x], theano.grad(fn(x).sum(), x), fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu) mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()]) for node in fg.maker.fgraph.toposort()])
g_out = f(data) g_out = fg(data)
if func is T.max: if func is T.max:
# Compare again the CPU result # Compare again the CPU result
out = max_pool_2d(x, (ws, ws), ignore_border=True) out = max_pool_2d(x, (ws, ws),
f = theano.function([x], theano.grad(out.sum(), x), padding=pad,
mode=mode_without_gpu) ignore_border=True)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMaxGrad) assert any([isinstance(node.op, DownsampleFactorMaxGrad)
for node in f.maker.fgraph.toposort()]) for node in fc.maker.fgraph.toposort()])
c_out = f(data) c_out = fc(data)
assert numpy.allclose(c_out, g_out) assert numpy.allclose(c_out, g_out)
...@@ -165,7 +188,7 @@ def test_pooling_opt(): ...@@ -165,7 +188,7 @@ def test_pooling_opt():
def test_dnn_tag(): def test_dnn_tag():
""" """
We test that if cudnn isn't avail we crash and that if it is avail, we use it. Test that if cudnn isn't avail we crash and that if it is avail, we use it.
""" """
x = T.ftensor4() x = T.ftensor4()
old = theano.config.on_opt_error old = theano.config.on_opt_error
...@@ -412,11 +435,11 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -412,11 +435,11 @@ class TestDnnInferShapes(utt.InferShapeTester):
mode=params[2] mode=params[2]
)() )()
pool_grad = dnn.GpuDnnPoolGrad()( pool_grad = dnn.GpuDnnPoolGrad()(
img, img,
out, out,
img_grad, img_grad,
desc desc
) )
self._compile_and_check( self._compile_and_check(
[img, img_grad, out], [img, img_grad, out],
[pool_grad], [pool_grad],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论