提交 6f9b538b authored 作者: Frederic's avatar Frederic 提交者: Pascal Lamblin

tmp

上级 ee75577d
...@@ -88,16 +88,22 @@ dnn_available.msg = None ...@@ -88,16 +88,22 @@ dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail): def c_set_tensor4d(var, desc, err, fail):
return """ return """
{
int str0, str1, str2, str3;
str3 = CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1;
str2 = CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3];
str1 = CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3];
str0 = CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1];
printf("str0=%%d str1=%%d str2=%%d str3=%%d\\n",
str0, str1, str2, str3
);
%(err)s = cudnnSetTensor4dDescriptorEx( %(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT, %(desc)s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(var)s)[0], CudaNdarray_HOST_DIMS(%(var)s)[0],
CudaNdarray_HOST_DIMS(%(var)s)[1], CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2], CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3], CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1], str0, str1, str2, str3
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
); );
if (%(err)s != CUDNN_STATUS_SUCCESS) { if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -108,13 +114,12 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) { ...@@ -108,13 +114,12 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
CudaNdarray_HOST_DIMS(%(var)s)[1], CudaNdarray_HOST_DIMS(%(var)s)[1],
CudaNdarray_HOST_DIMS(%(var)s)[2], CudaNdarray_HOST_DIMS(%(var)s)[2],
CudaNdarray_HOST_DIMS(%(var)s)[3], CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[0]?CudaNdarray_HOST_STRIDES(%(var)s)[0]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3]*CudaNdarray_HOST_DIMS(%(var)s)[1], str0, str1, str2, str3
CudaNdarray_HOST_STRIDES(%(var)s)[1]?CudaNdarray_HOST_STRIDES(%(var)s)[1]:CudaNdarray_HOST_DIMS(%(var)s)[2]*CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[2]?CudaNdarray_HOST_STRIDES(%(var)s)[2]:CudaNdarray_HOST_DIMS(%(var)s)[3],
CudaNdarray_HOST_STRIDES(%(var)s)[3]?CudaNdarray_HOST_STRIDES(%(var)s)[3]:1
); );
%(fail)s %(fail)s
} }
}
""" % dict(var=var, err=err, desc=desc, fail=fail) """ % dict(var=var, err=err, desc=desc, fail=fail)
...@@ -679,15 +684,20 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -679,15 +684,20 @@ class GpuDnnPoolDesc(GpuOp):
self.stride = stride self.stride = stride
assert len(stride) == 2 assert len(stride) == 2
self.pad = pad self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and dnn_version() < 20: if (pad[0] != 0 or pad[1] != 0) and version() < 20:
raise RuntimeError("CUDNN pooling need version v2 to support") raise RuntimeError("Pooling with padding need CUDNN v2 or"
" more recent.")
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
if not hasattr(d, 'pad'): if not hasattr(self, 'pad'):
import pdb;pdb.set_trace()
self.pad = (0, 0) self.pad = (0, 0)
def make_node(self): def make_node(self):
if self.pad != (0, 0) and version() < 20:
raise RuntimeError(
"CUDNNpooling with padding request CUDNN v2 or more recent.")
return Apply(self, [], return Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t")()]) [CDataType("cudnnPoolingDescriptor_t")()])
...@@ -749,11 +759,7 @@ class GpuDnnPool(DnnBase): ...@@ -749,11 +759,7 @@ class GpuDnnPool(DnnBase):
:param img: the image 4d tensor. :param img: the image 4d tensor.
:param desc: the pooling descriptor. :param desc: the pooling descriptor.
""" """
__props__ = ('ignore_border', ) __props__ = ()
def __init__(self, ignore_border):
self.ignore_border = ignore_border
DnnBase.__init__(self)
def make_node(self, img, desc): def make_node(self, img, desc):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
...@@ -853,23 +859,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -853,23 +859,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0]; %(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0];
%(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1]; %(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1];
if (%(ignore_border)d){ %(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] + (vpad*2) - wsX) / strideX + 1;
%(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] - wsX) / strideX + 1; %(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1;
%(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] - wsY) / strideY + 1;
}else{
int r = CudaNdarray_HOST_DIMS(%(input)s)[2];
int c = CudaNdarray_HOST_DIMS(%(input)s)[3];
if(strideX >= wsX){
%(out)s_dims[2] = (r - 1) / strideX + 1;
}else{
%(out)s_dims[2] = max(0, (r - 1 - wsX) / strideX + 1) + 1;
}
if(strideY >= wsY){
%(out)s_dims[3] = (c - 1) / strideY + 1;
}else{
%(out)s_dims[3] = max(0, (c - 1 - wsY) / strideY + 1) + 1;
}
}
if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0) if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0)
{ {
...@@ -908,7 +899,6 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -908,7 +899,6 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
name=name, set_in=set_in, name=name, set_in=set_in,
set_out=set_out, input=inputs[0], set_out=set_out, input=inputs[0],
input_desc="input"+name, input_desc="input"+name,
ignore_border=self.ignore_border,
output_desc="output"+name) output_desc="output"+name)
def grad(self, inp, grads): def grad(self, inp, grads):
...@@ -919,8 +909,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -919,8 +909,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
out = self(img, desc) out = self(img, desc)
g_out = GpuDnnPoolGrad(ignore_border=self.ignore_border)( g_out = GpuDnnPoolGrad()(img, out, grad, desc)
img, out, grad, desc)
return g_out, theano.gradient.DisconnectedType()() return g_out, theano.gradient.DisconnectedType()()
...@@ -929,6 +918,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -929,6 +918,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]] return [[1], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return
return (5, version()) return (5, version())
...@@ -941,15 +931,9 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -941,15 +931,9 @@ class GpuDnnPoolGrad(DnnBase):
:param inp_grad: same size as out, but is the corresponding gradient information. :param inp_grad: same size as out, but is the corresponding gradient information.
:param desc: The pooling descriptor. :param desc: The pooling descriptor.
""" """
__props__ = ('ignore_border', ) __props__ = ()
def __init__(self, ignore_border):
self.ignore_border = ignore_border
DnnBase.__init__(self)
def make_node(self, inp, out, inp_grad, desc): def make_node(self, inp, out, inp_grad, desc):
if self.ignore_border is False:
raise NotImplementedError()
inp = as_cuda_ndarray_variable(inp) inp = as_cuda_ndarray_variable(inp)
if inp.type.ndim != 4: if inp.type.ndim != 4:
raise TypeError('inp must be 4D tensor') raise TypeError('inp must be 4D tensor')
...@@ -1096,6 +1080,24 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1096,6 +1080,24 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s", "GpuDnnPoolGrad: error doing operation: %%s",
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
printf("input.shape=(%%d, %%d, %%d, %%d) input_grad.shape=(%%d, %%d, %%d, %%d) output.shape=(%%d, %%d, %%d, %%d) output_grad.shape=(%%d, %%d, %%d, %%d)\\n",
CudaNdarray_HOST_DIMS(%(input)s)[0],
CudaNdarray_HOST_DIMS(%(input)s)[1],
CudaNdarray_HOST_DIMS(%(input)s)[2],
CudaNdarray_HOST_DIMS(%(input)s)[3],
CudaNdarray_HOST_DIMS(%(input_grad)s)[0],
CudaNdarray_HOST_DIMS(%(input_grad)s)[1],
CudaNdarray_HOST_DIMS(%(input_grad)s)[2],
CudaNdarray_HOST_DIMS(%(input_grad)s)[3],
CudaNdarray_HOST_DIMS(%(output)s)[0],
CudaNdarray_HOST_DIMS(%(output)s)[1],
CudaNdarray_HOST_DIMS(%(output)s)[2],
CudaNdarray_HOST_DIMS(%(output)s)[3],
CudaNdarray_HOST_DIMS(%(output_grad)s)[0],
CudaNdarray_HOST_DIMS(%(output_grad)s)[1],
CudaNdarray_HOST_DIMS(%(output_grad)s)[2],
CudaNdarray_HOST_DIMS(%(output_grad)s)[3]
);
%(fail)s %(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, """ % dict(output_grad=out_grad, desc=desc,
...@@ -1108,14 +1110,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1108,14 +1110,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+name) output_grad_desc="output_grad"+name)
def c_code_cache_version(self): def c_code_cache_version(self):
return
return (4, version()) return (4, version())
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0), def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
ignore_border=True):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
...@@ -1126,6 +1128,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0), ...@@ -1126,6 +1128,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0),
:param ws: subsampling window size :param ws: subsampling window size
:param stride: subsampling stride (default: (1, 1)) :param stride: subsampling stride (default: (1, 1))
:param mode: one of 'max', 'average' (default: 'max') :param mode: one of 'max', 'average' (default: 'max')
:param pad: todo doc
:warning: The cuDNN library only works with GPU that have a compute :warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1134,7 +1137,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0), ...@@ -1134,7 +1137,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0),
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)() desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)()
return GpuDnnPool(ignore_border=ignore_border)(img, desc) return GpuDnnPool()(img, desc)
class GpuDnnSoftmaxBase(DnnBase): class GpuDnnSoftmaxBase(DnnBase):
...@@ -1463,10 +1466,11 @@ if True: ...@@ -1463,10 +1466,11 @@ if True:
if not dnn_available(): if not dnn_available():
return return
if isinstance(node.op, GpuDownsampleFactorMax): if isinstance(node.op, GpuDownsampleFactorMax):
if not node.op.ignore_border:
return
img, = node.inputs img, = node.inputs
ds = node.op.ds ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds, return [dnn_pool(gpu_contiguous(img), ds, ds)]
ignore_border=node.op.ignore_border)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([DownsampleFactorMax]) @local_optimizer([DownsampleFactorMax])
...@@ -1474,13 +1478,14 @@ if True: ...@@ -1474,13 +1478,14 @@ if True:
if not dnn_available(): if not dnn_available():
return return
if isinstance(node.op, DownsampleFactorMax): if isinstance(node.op, DownsampleFactorMax):
if not node.op.ignore_border:
return
img, = node.inputs img, = node.inputs
ds = node.op.ds ds = node.op.ds
stride = node.op.st stride = node.op.st
if (img.owner and isinstance(img.owner.op, HostFromGpu)): if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride, ds, stride=stride)
ignore_border=node.op.ignore_border)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1489,17 +1494,16 @@ if True: ...@@ -1489,17 +1494,16 @@ if True:
if not dnn_available(): if not dnn_available():
return return
if isinstance(node.op, GpuDownsampleFactorMaxGrad): if isinstance(node.op, GpuDownsampleFactorMaxGrad):
inp, out, inp_grad = node.inputs
ds = node.op.ds
if not node.op.ignore_border: if not node.op.ignore_border:
return return
inp, out, inp_grad = node.inputs
ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")() desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad(ignore_border=node.op.ignore_border)( return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp), gpu_contiguous(out),
gpu_contiguous(out), gpu_contiguous(inp_grad),
gpu_contiguous(inp_grad), desc)]
desc)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([DownsampleFactorMaxGrad]) @local_optimizer([DownsampleFactorMaxGrad])
...@@ -1518,11 +1522,10 @@ if True: ...@@ -1518,11 +1522,10 @@ if True:
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max")() desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max")()
if not node.op.ignore_border: if not node.op.ignore_border:
return return
ret = GpuDnnPoolGrad(ignore_border=node.op.ignore_border)( ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp), gpu_contiguous(out),
gpu_contiguous(out), gpu_contiguous(inp_grad),
gpu_contiguous(inp_grad), desc)
desc)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
......
...@@ -31,7 +31,7 @@ else: ...@@ -31,7 +31,7 @@ else:
def pool_2d_i2n(input, ds=(2, 2), strides=None, def pool_2d_i2n(input, ds=(2, 2), strides=None,
ignore_border=True, pad=(0, 0),
pool_function=T.max, mode='ignore_borders'): pool_function=T.max, mode='ignore_borders'):
if strides is None: if strides is None:
strides = ds strides = ds
...@@ -41,8 +41,19 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, ...@@ -41,8 +41,19 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
"strides should be smaller than or equal to ds," "strides should be smaller than or equal to ds,"
" strides=(%d, %d) and ds=(%d, %d)" % " strides=(%d, %d) and ds=(%d, %d)" %
(strides + ds)) (strides + ds))
shape = input.shape shape = input.shape
if pad != (0, 0):
assert pool_function is T.max
pad_x = pad[0]
pad_y = pad[1]
a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x*2,
shape[3] + pad_y*2)
input = T.set_subtensor(a[:, :,
pad_x:pad_x+shape[2],
pad_y:pad_y+shape[3]],
input)
shape = input.shape
neibs = images2neibs(input, ds, strides, mode=mode) neibs = images2neibs(input, ds, strides, mode=mode)
pooled_neibs = pool_function(neibs, axis=1) pooled_neibs = pool_function(neibs, axis=1)
...@@ -59,34 +70,43 @@ def test_pooling(): ...@@ -59,34 +70,43 @@ def test_pooling():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4() x = T.ftensor4()
for func, ignore_border in product( for func, pad in product(
(T.max, T.mean), (False, True)): (T.max, T.mean), ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
if pad != (0, 0) and cuda.dnn.version() < 20:
continue
for ws in (4, 2, 5): for ws in (4, 2, 5):
for stride in (2, 3): for stride in (2, 3):
if stride > ws: if stride > ws:
continue continue
if func is T.max: if func is T.max and pad == (0, 0):
# We will check that the opt introduced it. # We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws), out1 = max_pool_2d(x, (ws, ws),
st=(stride, stride), st=(stride, stride),
ignore_border=ignore_border) ignore_border=True,)
# pad=pad)
else: else:
out1 = cuda.dnn.dnn_pool( out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws), x, ws=(ws, ws),
stride=(stride, stride), stride=(stride, stride),
ignore_border=ignore_border, pad=pad,
mode='max' if func is T.max else "average") mode='max' if func is T.max else "average")
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pad=pad,
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out1, mode=mode_without_gpu) f2 = theano.function([x], out2, mode=mode_without_gpu)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes]) for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100), for shp in [(1, 10, 100, 100),
(1, 3, 99, 99), (1, 3, 99, 99),
(32, 1, 147, 197), (32, 1, 147, 197),
]: ]:
print func, pad, ws, stride, shp
data = numpy.random.normal(0, 1, shp).astype("float32") data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__() a = f1(data).__array__()
...@@ -101,49 +121,45 @@ def test_pooling(): ...@@ -101,49 +121,45 @@ def test_pooling():
ws = 2 ws = 2
strides = 2 strides = 2
print func, pad, ws, stride, shp
# This test the CPU grad + opt + GPU implemtentation # This test the CPU grad + opt + GPU implemtentation
def fn(x): def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=ignore_border) return max_pool_2d(x, (ws, ws), ignore_border=True,)
# pad=pad)
theano.tests.unittest_tools.verify_grad(fn, [data], theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False, cast_to_output_type=False,
mode=mode_with_gpu) mode=mode_with_gpu)
# Confirm that the opt would have inserted it. # Confirm that the opt would have inserted it.
fg = theano.function([x], theano.grad(fn(x).sum(), x), fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu) mode=mode_with_gpu)
if ignore_border: assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) for node in fg.maker.fgraph.toposort()])
for node in fg.maker.fgraph.toposort()])
else:
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation # Test the GPU grad + GPU implementation
def fn(x): def fn(x):
dnn_op = cuda.dnn.dnn_pool( dnn_op = cuda.dnn.dnn_pool(
x, ws=(ws, ws), x, ws=(ws, ws),
stride=(stride, stride), stride=(stride, stride),
ignore_border=ignore_border, pad=pad,
mode='max' if func is T.max else "average") mode='max' if func is T.max else "average")
return dnn_op return dnn_op
try: theano.tests.unittest_tools.verify_grad(
theano.tests.unittest_tools.verify_grad( fn, [data],
fn, [data], cast_to_output_type=False,
cast_to_output_type=False, mode=mode_with_gpu)
mode=mode_with_gpu) # Confirm that we get the good op.
# Confirm that we get the good op. fg = theano.function([x], theano.grad(fn(x).sum(), x),
fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu)
mode=mode_with_gpu) assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) for node in fg.maker.fgraph.toposort()])
for node in fg.maker.fgraph.toposort()]) g_out = fg(data)
g_out = fg(data)
assert ignore_border if func is T.max and pad == (0, 0):
except NotImplementedError:
assert not ignore_border
if func is T.max and ignore_border:
# Compare again the CPU result # Compare again the CPU result
out = max_pool_2d(x, (ws, ws), ignore_border=ignore_border) out = max_pool_2d(x, (ws, ws),
# pad=pad,
ignore_border=True)
fc = theano.function([x], theano.grad(out.sum(), x), fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu) mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMaxGrad) assert any([isinstance(node.op, DownsampleFactorMaxGrad)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论