提交 8472d13a authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3965 from harmdevries89/gpudnnpool2

gpu dnn pool takes tensor variables
......@@ -1845,7 +1845,7 @@ class _Linker(gof.link.LocalLinker):
thunk.outputs = [storage_map[v] for v in node.outputs]
thunk_other = thunk
else:
new_node = node.op.prepare_node(node)
new_node = node.op.prepare_node(node, storage_map, compute_map)
if new_node is not None:
node = new_node
......
......@@ -836,7 +836,7 @@ class Op(utils.object2, PureOp, CLinkerOp):
else:
return NotImplemented
def prepare_node(self, node):
def prepare_node(self, node, storage_map, compute_map):
"""
Make any special modifications that the Op needs before doing
make_thunk().
......@@ -959,7 +959,8 @@ class Op(utils.object2, PureOp, CLinkerOp):
"""
logger = logging.getLogger('theano.gof.op.Op')
new_node = self.prepare_node(node)
new_node = self.prepare_node(node, storage_map=storage_map,
compute_map=compute_map)
if new_node is not None:
node = new_node
......@@ -1218,7 +1219,8 @@ int main( int argc, const char* argv[] )
self.openmp = False
theano.config.openmp = False
def prepare_node(self, node):
def prepare_node(self, node, storage_map,
compute_map):
self.update_self_openmp()
......
......@@ -1953,7 +1953,7 @@ class GpuConv(GpuOp):
images[2] * images[3] * 2)
return flops
def prepare_node(self, node):
def prepare_node(self, node, storage_map, compute_map):
if node.op.max_threads_dim0 is None:
cuda = theano.sandbox.cuda
device_id = cuda.use.device_number
......
......@@ -1367,40 +1367,78 @@ class GpuDnnPool(DnnBase):
----------
img
The image 4d or 5d tensor.
desc
The pooling descriptor.
ws
Windows size.
stride
(dx, dy).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' correspond to 'average_inc_pad'.
pad
(padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
"""
__props__ = ()
__props__ = ("mode",)
def make_node(self, img, desc):
img = as_cuda_ndarray_variable(img)
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
def __init__(self, mode='max'):
super(GpuDnnPool, self).__init__()
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
if desc.owner is not None:
dop = desc.owner.op
e_ndim = dop.get_ndim() + 2 # 4 or 5
def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) == 2:
warnings.warn("Theano GPUDnnPoolGrad internal changed.", stacklevel=3)
# Old interface
self.mode = node.inputs[1].owner.op.mode
ws = theano.tensor.constant(node.inputs[1].owner.op.ws)
st = theano.tensor.constant(node.inputs[1].owner.op.stride)
pad = theano.tensor.constant(node.inputs[1].owner.op.pad)
node.inputs[1] = ws
node.inputs.append(st)
node.inputs.append(pad)
if isinstance(ws, theano.Constant):
storage_map[ws] = [ws.data]
compute_map[ws] = [True]
else:
storage_map[ws] = [None]
compute_map[ws] = [False]
if isinstance(st, theano.Constant):
storage_map[st] = [st.data]
compute_map[st] = [True]
else:
storage_map[st] = [None]
compute_map[st] = [False]
if isinstance(pad, theano.Constant):
storage_map[pad] = [pad.data]
compute_map[pad] = [True]
else:
storage_map[pad] = [None]
compute_map[pad] = [False]
if img.type.ndim != e_ndim:
raise TypeError('img must be %dD tensor' % e_ndim)
def make_node(self, img, ws, stride, pad):
img = as_cuda_ndarray_variable(img)
assert (img.ndim in [4, 5])
return Apply(self, [img, desc], [img.type()])
ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
return Apply(self, [img, ws, stride, pad], [img.type()])
def infer_shape(self, node, shape):
if not node.inputs[1].owner:
raise theano.tensor.ShapeError()
desc = node.inputs[1].owner.op
nd = desc.get_ndim()
w = desc.ws
s = desc.stride
p = desc.pad
w = node.inputs[1]
s = node.inputs[2]
p = node.inputs[3]
ret = [shape[0][0], shape[0][1],
(shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1,
(shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1]
if nd == 3:
if node.inputs[0].ndim == 5:
ret.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1)
return [ret]
......@@ -1408,6 +1446,7 @@ class GpuDnnPool(DnnBase):
return """
cudnnTensorDescriptor_t input%(name)s;
cudnnTensorDescriptor_t output%(name)s;
cudnnPoolingDescriptor_t pool%(name)s;
""" % dict(name=name)
def c_init_code_struct(self, node, name, sub):
......@@ -1415,6 +1454,7 @@ cudnnTensorDescriptor_t output%(name)s;
cudnnStatus_t err%(name)s;
input%(name)s = NULL;
output%(name)s = NULL;
pool%(name)s = NULL;
if ((err%(name)s = cudnnCreateTensorDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(inp): %%s", cudnnGetErrorString(err%(name)s));
......@@ -1425,20 +1465,40 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output%(name)s)) != CUDNN_STATUS
"(out): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s
}
if ((err%(name)s = cudnnCreatePoolingDescriptor(&pool%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling "
"descriptor: %%s", cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, name):
return """
if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); }
if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
""" % dict(name=name)
def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[1]
ws = inputs[1]
stride = inputs[2]
pad = inputs[3]
out, = outputs
if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else:
raise NotImplementedError("Unsupported pooling model.")
return """
cudnnStatus_t err%(name)s;
cudnnStatus_t err;
int %(out)s_dims[5];
......@@ -1450,31 +1510,36 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) {
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
cudnnPoolingMode_t mode;
int win[3];
int pad[3];
int str[3];
int ndims;
err%(name)s = cudnnGetPoolingNdDescriptor(
%(desc)s, 3,
&mode, &ndims,
win, pad, str);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnGetPoolingNdDescriptor operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
int win[%(nd)d];
int pad[%(nd)d];
int str[%(nd)d];
for(int i = 0; i < %(nd)d; i++) {
win[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
}
for(int i = 0; i < %(nd)d; i++) {
pad[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
}
for(int i = 0; i < %(nd)d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(%(str)s, i));
}
err = cudnnSetPoolingNdDescriptor(
pool%(name)s, %(mode_flag)s, %(nd)d,
win, pad, str);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err));
%(fail)s
}
%(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0];
%(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1];
%(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] + (pad[0]*2) - win[0]) / str[0] + 1;
%(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] + (pad[1]*2) - win[1]) / str[1] + 1;
if (ndims == 3)
if (%(nd)s == 3)
%(out)s_dims[4] = (CudaNdarray_HOST_DIMS(%(input)s)[4] + (pad[2]*2) - win[2]) / str[2] + 1;
if (CudaNdarray_prep_output(&%(out)s, ndims+2, %(out)s_dims) != 0)
if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0)
{
%(fail)s
}
......@@ -1485,44 +1550,46 @@ if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0)
{
const float alpha = 1;
const float beta = 0;
err%(name)s = cudnnPoolingForward(
err = cudnnPoolingForward(
_handle,
%(desc)s,
pool%(name)s,
&alpha,
%(input_desc)s, CudaNdarray_DEV_DATA(%(input)s),
&beta,
%(output_desc)s, CudaNdarray_DEV_DATA(%(out)s)
);
}
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnPoolingForward operation: %%s",
cudnnGetErrorString(err%(name)s));
cudnnGetErrorString(err));
%(fail)s
}
""" % dict(out=out, desc=desc, fail=sub['fail'],
""" % dict(out=out, fail=sub['fail'],
name=name, input=inputs[0],
input_desc="input" + name,
output_desc="output" + name)
ws=ws, pad=pad, str=stride,
nd=node.inputs[0].ndim - 2, input_desc="input" + name,
output_desc="output" + name,
mode_flag=mode_flag)
def grad(self, inp, grads):
img, desc = inp
img, ws, stride, pad = inp
grad, = grads
grad = gpu_contiguous(grad)
out = self(img, desc)
out = self(img, ws, stride, pad)
g_out = GpuDnnPoolGrad()(img, out, grad, desc)
g_out = GpuDnnPoolGrad(mode=self.mode)(img, out, grad, ws, stride, pad)
return g_out, theano.gradient.DisconnectedType()()
return g_out, theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()()
def connection_pattern(self, node):
# not connected to desc
return [[1], [0]]
return [[1], [0], [0], [0]]
def c_code_cache_version(self):
return (7, version())
return (8, version())
class GpuDnnPoolGrad(DnnBase):
......@@ -1537,35 +1604,75 @@ class GpuDnnPoolGrad(DnnBase):
The output of the pooling in the forward.
inp_grad
Same size as out, but is the corresponding gradient information.
desc
The pooling descriptor.
ws
Windows size.
stride
(dx, dy).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' correspond to 'average_inc_pad'.
pad
(padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
"""
__props__ = ()
__props__ = ('mode',)
def make_node(self, inp, out, inp_grad, desc):
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
def __init__(self, mode='max'):
super(GpuDnnPoolGrad, self).__init__()
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) == 4:
warnings.warn("Theano GPUDnnPoolGrad internal changed.", stacklevel=3)
# Old interface
self.mode = node.inputs[3].owner.op.mode
ws = theano.tensor.constant(node.inputs[3].owner.op.ws)
st = theano.tensor.constant(node.inputs[3].owner.op.stride)
pad = theano.tensor.constant(node.inputs[3].owner.op.pad)
node.inputs[3] = ws
node.inputs.append(st)
node.inputs.append(pad)
if isinstance(ws, theano.Constant):
storage_map[ws] = [ws.data]
compute_map[ws] = [True]
else:
storage_map[ws] = [None]
compute_map[ws] = [False]
if isinstance(st, theano.Constant):
storage_map[st] = [st.data]
compute_map[st] = [True]
else:
storage_map[st] = [None]
compute_map[st] = [False]
if isinstance(pad, theano.Constant):
storage_map[pad] = [pad.data]
compute_map[pad] = [True]
else:
storage_map[pad] = [None]
compute_map[pad] = [False]
def make_node(self, inp, out, inp_grad, ws, stride, pad):
inp = as_cuda_ndarray_variable(inp)
assert (inp.ndim in [4, 5])
inp_grad = as_cuda_ndarray_variable(inp_grad)
assert (inp_grad.ndim in [4, 5])
out = as_cuda_ndarray_variable(out)
assert(out.ndim in [4, 5])
if desc.owner is not None:
nd = desc.owner.op.get_ndim() + 2 # 4 or 5
if inp.type.ndim != nd:
raise TypeError('inp must be %dD tensor' % (nd,))
if inp_grad.type.ndim != nd:
raise TypeError('inp_grad must be %dD tensor' % (nd,))
assert (inp_grad.ndim == inp.ndim)
assert (inp.ndim == out.ndim)
if out.type.ndim != nd:
raise TypeError('out must be %dD tensor' % (nd,))
ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
return Apply(self, [inp, out, inp_grad, desc],
return Apply(self, [inp, out, inp_grad, ws, stride, pad],
[inp.type()])
def c_support_code_struct(self, node, name):
......@@ -1574,6 +1681,7 @@ cudnnTensorDescriptor_t input%(name)s;
cudnnTensorDescriptor_t input_grad%(name)s;
cudnnTensorDescriptor_t output%(name)s;
cudnnTensorDescriptor_t output_grad%(name)s;
cudnnPoolingDescriptor_t pool%(name)s;
""" % dict(name=name)
def c_init_code_struct(self, node, name, sub):
......@@ -1583,6 +1691,7 @@ input%(name)s = NULL;
input_grad%(name)s = NULL;
output%(name)s = NULL;
output_grad%(name)s = NULL;
pool%(name)s = NULL;
if ((err%(name)s = cudnnCreateTensorDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
......@@ -1607,6 +1716,12 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output_grad%(name)s)) != CUDNN_S
"(output_grad): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s
}
if ((err%(name)s = cudnnCreatePoolingDescriptor(&pool%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate pooling descriptor "
"(pool): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, name):
......@@ -1615,15 +1730,28 @@ if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); }
if (input_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(input_grad%(name)s); }
if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(name)s); }
if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
""" % dict(name=name)
def c_code(self, node, name, inputs, outputs, sub):
# Here the name out and inp are based on the cudnn definition.
# Not the definition of this class.
# This make it complicated.
out, inp, inp_grad, desc = inputs
out, inp, inp_grad, ws, stride, pad = inputs
out_grad, = outputs
if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else:
raise NotImplementedError("Unsupported pooling model.")
return """
cudnnStatus_t err%(name)s;
......@@ -1659,16 +1787,27 @@ if (CudaNdarray_prep_output(&%(output_grad)s,
%(fail)s
}
// Get the pooling_mode to be used. Variable 'tmp' is used because we don't
// care about the other outputs of the function
cudnnPoolingMode_t pooling_mode;
int tmp;
err%(name)s = cudnnGetPoolingNdDescriptor(%(desc)s, 0, &pooling_mode, &tmp,
&tmp, &tmp, &tmp);
int win[%(nd)d];
int pad[%(nd)d];
int str[%(nd)d];
for(int i = 0; i < %(nd)d; i++) {
win[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
}
for(int i = 0; i < %(nd)d; i++) {
pad[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
}
for(int i = 0; i < %(nd)d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(%(str)s, i));
}
err%(name)s = cudnnSetPoolingNdDescriptor(
pool%(name)s, %(mode_flag)s, %(nd)d,
win, pad, str);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: could not obtain pooling mode");
%(fail)s
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
if (c_set_tensorNd(%(output_grad)s, %(output_grad_desc)s) != 0)
......@@ -1679,7 +1818,7 @@ const float alpha = 1;
const float beta = 0;
err%(name)s = cudnnPoolingBackward(
_handle,
%(desc)s,
pool%(name)s,
&alpha,
%(input_desc)s, CudaNdarray_DEV_DATA(%(input)s),
%(input_grad_desc)s, CudaNdarray_DEV_DATA(%(input_grad)s),
......@@ -1694,16 +1833,18 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(output_grad=out_grad, desc=desc,
""" % dict(output_grad=out_grad,
fail=sub['fail'], name=name,
input=inp, input_grad=inp_grad, output=out,
input_desc="input" + name,
input_grad_desc="input_grad" + name,
output_desc="output" + name,
output_grad_desc="output_grad" + name)
output_grad_desc="output_grad" + name,
mode_flag=mode_flag, nd=node.inputs[0].ndim - 2,
ws=ws, pad=pad, str=stride)
def c_code_cache_version(self):
return (7, version())
return (8, version())
def infer_shape(self, node, shape):
return [shape[0]]
......@@ -1725,7 +1866,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
stride
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad}
pad
pad :
(pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
......@@ -1742,8 +1883,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)()
return GpuDnnPool()(img, desc)
return GpuDnnPool(mode=mode)(img, ws, stride, pad)
class GpuDnnSoftmaxBase(DnnBase):
......@@ -2222,11 +2362,10 @@ if True:
inp, out, inp_grad = node.inputs
ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
desc)]
return [GpuDnnPoolGrad(mode='max')(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
ds, ds, (0, 0))]
@register_opt('cudnn')
@local_optimizer([MaxPoolGrad])
......@@ -2246,11 +2385,11 @@ if True:
(out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
desc)
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
ds, st, pad)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......@@ -2270,12 +2409,11 @@ if True:
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
contiguous_inp_grad = gpu_contiguous(inp_grad)
ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
contiguous_inp_grad,
contiguous_inp_grad,
desc)
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
contiguous_inp_grad,
contiguous_inp_grad,
ds, st, pad)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -3,6 +3,8 @@ import logging
from nose.plugins.skip import SkipTest
import numpy
from itertools import chain, product
import six.moves.cPickle as pickle
import os
import theano
from six import StringIO
......@@ -70,19 +72,6 @@ def test_dnn_conv_desc_merge():
assert d1 == d2
def test_dnn_pool_desc_merge():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = theano.tensor.ftensor4('x')
y = dnn.dnn_pool(x, (2, 2))
z = dnn.dnn_pool(x, (2, 2))
f = theano.function([x], [y, z])
descs = [n for n in f.maker.fgraph.apply_nodes
if isinstance(n.op, dnn.GpuDnnPoolDesc)]
assert len(descs) == 1, f.maker.fgraph
def test_dnn_conv_merge():
"""This test that we merge correctly multiple dnn_conv.
......@@ -346,6 +335,64 @@ def test_pooling():
utt.assert_allclose(c_out, g_out)
def test_pooling_with_tensor_vars():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4()
ws = theano.shared(numpy.array([2, 2], dtype='int32'))
st = theano.shared(numpy.array([1, 1], dtype='int32'))
pad = theano.shared(numpy.array([0, 0], dtype='int32'))
mode = 'max'
def fn(x):
dnn_op = cuda.dnn.dnn_pool(
x, ws=ws,
stride=st,
pad=pad,
mode=mode)
return dnn_op
for shp in [(1, 1, 2, 2),
(1, 1, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32") * 10
theano.tests.unittest_tools.verify_grad(
fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
out2 = pool_2d_i2n(x, ds=(2, 2), strides=(1, 1),
pad=(0, 0),
pool_function=T.max)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], fn(x), mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100),
(1, 3, 99, 99),
(32, 1, 147, 197),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__()
b = f2(data).__array__()
utt.assert_allclose(a, b)
def test_old_pool_interface():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'old_pool_interface.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp:
pickle.load(fp)
def test_pooling3d():
# CuDNN 3d pooling requires CuDNN v3. Don't test if the CuDNN version is
# too old.
......@@ -607,8 +654,9 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
input_val = numpy.random.normal(0, 1, inp_shape).astype("float32")
out = f(input_val)
expected_out = numpy.log(numpy.exp(input_val) /
numpy.exp(input_val).sum(1)[:, None, :, :])
expected_out = numpy.log(
numpy.exp(input_val) /
numpy.exp(input_val).sum(1)[:, None, :, :])
utt.assert_allclose(out, expected_out)
......@@ -999,14 +1047,10 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2), (3, 3)],
modes
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
self._compile_and_check(
[img],
[dnn.GpuDnnPool()(img, desc)],
[dnn.GpuDnnPool(mode=params[2])
(img, params[0], params[1], (0, 0))],
[img_val],
dnn.GpuDnnPool
)
......@@ -1035,16 +1079,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2), (3, 3)],
['max', 'average_inc_pad']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
pool_grad = dnn.GpuDnnPoolGrad()(
img,
out,
img_grad,
desc
params[0],
params[1],
(0, 0)
)
self._compile_and_check(
[img, img_grad, out],
......
......@@ -792,7 +792,7 @@ class Elemwise(OpenMPOp):
return ret
def prepare_node(self, node):
def prepare_node(self, node, storage_map, compute_map):
# Postpone the ufunc building to the last minutes
# NumPy ufunc support only up to 31 inputs.
# But our c code support more.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论