提交 62c81c9c authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add v3 features for softmax.

上级 2198fc07
......@@ -1333,15 +1333,17 @@ class GpuDnnSoftmaxBase(DnnBase):
DnnBase.__init__(self)
self.tensor_format = tensor_format
assert(algo in ('fast', 'accurate'))
assert(algo in ('fast', 'accurate', 'log'))
if algo == 'log' and version() < 3000:
raise RuntimeError("Need CuDNN v3 for log-softmax")
self.algo = algo
assert(mode in ('instance', 'channel'))
self.mode = mode
self.tensor_4d_descs = [softmax_input
for softmax_input in self.softmax_inputs]
self.tensor_4d_descs.append('softmax_output')
self.tensor_descs = [softmax_input
for softmax_input in self.softmax_inputs]
self.tensor_descs.append('softmax_output')
def infer_shape(self, node, shape):
if self.direction == 'forward':
......@@ -1349,22 +1351,22 @@ class GpuDnnSoftmaxBase(DnnBase):
else:
return [shape[1]]
def _define_tensor4d_desc(self, name, id):
def _define_tensor_desc(self, name, id):
return """
cudnnTensorDescriptor_t %(id)s_%(name)s;
""" % dict(name=name, id=id)
def _init_tensor4d_desc(self, name, id, fail):
def _init_tensor_desc(self, name, id, fail):
return """
%(id)s_%(name)s = NULL;
if ((err%(name)s = cudnnCreateTensorDescriptor(&%(id)s_%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
": %%s", cudnnGetErrorString(err%(name)s));
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor : %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(name=name, id=id, fail=fail)
def _clean_tensor4d_desc(self, name, id):
def _clean_tensor_desc(self, name, id):
return """
if(%(id)s_%(name)s!= NULL)
cudnnDestroyTensorDescriptor(%(id)s_%(name)s);
......@@ -1372,8 +1374,8 @@ if(%(id)s_%(name)s!= NULL)
def c_support_code_struct(self, node, name):
result = ''
for id in self.tensor_4d_descs:
result += self._define_tensor4d_desc(name, id)
for id in self.tensor_descs:
result += self._define_tensor_desc(name, id)
return result
def c_init_code_struct(self, node, name, sub):
......@@ -1381,14 +1383,14 @@ if(%(id)s_%(name)s!= NULL)
cudnnStatus_t err%(name)s;
""" % dict(name=name)
for id in self.tensor_4d_descs:
result += self._init_tensor4d_desc(name, id, sub['fail'])
for id in self.tensor_descs:
result += self._init_tensor_desc(name, id, sub['fail'])
return result
def c_cleanup_code_struct(self, node, name):
result = ''
for id in self.tensor_4d_descs:
result += self._clean_tensor4d_desc(name, id)
for id in self.tensor_descs:
result += self._clean_tensor_desc(name, id)
return result
def c_code(self, node, name, inputs, outputs, sub):
......@@ -1396,43 +1398,31 @@ cudnnStatus_t err%(name)s;
outs, = outputs
if self.tensor_format == 'b01c':
tensor_format = 1
tensor_format = "CUDNN_TENSOR_NHWC"
else:
tensor_format = 0
tensor_format = "CUDNN_TENSOR_NCHW"
if self.mode == 'instance':
mode = 1
mode = "CUDNN_SOFTMAX_MODE_INSTANCE"
else:
mode = 0
mode = "CUDNN_SOFTMAX_MODE_CHANNEL"
if self.algo == 'fast':
algo = 1
algo = "CUDNN_SOFTMAX_FAST"
elif self.algo == 'log':
algo = "CUDNN_SOFTMAX_LOG"
else:
algo = 0
# Setup configuration variables.
result = """
cudnnStatus_t err%(name)s;
cudnnTensorFormat_t format%(name)s = CUDNN_TENSOR_NCHW;
if (%(tensor_format)d == 1)
format%(name)s = CUDNN_TENSOR_NHWC;
cudnnSoftmaxAlgorithm_t algo%(name)s = CUDNN_SOFTMAX_ACCURATE;
if (%(algo)d == 1)
algo%(name)s = CUDNN_SOFTMAX_FAST;
cudnnSoftmaxMode_t mode%(name)s = CUDNN_SOFTMAX_MODE_CHANNEL;
if (%(mode)d == 1)
mode%(name)s = CUDNN_SOFTMAX_MODE_INSTANCE;
""" % dict(name=name, tensor_format=tensor_format, mode=mode, algo=algo)
algo = "CUDNN_SOFTMAX_ACCURATE"
# Validate the input and build the input variables.
for input_idx, input_name in enumerate(self.softmax_inputs):
result += c_set_tensor4d(ins[input_idx], input_name + "_" + name,
"err" + name, sub['fail'])
result += """
if (c_set_tensorNd(%(t)s, %(desc)s) != 0)
%(fail)s
""" % dict(t=ins[input_idx], desc=input_name + "_" + name, fail=sub['fail'])
subs = dict(ins=ins[-1], outs=outs, fail=sub['fail'],
name=name)
name=name, algo=algo, mode=mode)
for idx, softmax_input in enumerate(self.softmax_inputs):
subs['name%d' % idx] = softmax_input
......@@ -1446,10 +1436,9 @@ if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s),
{
%(fail)s
}
if (c_set_tensorNd(%(outs)s, softmax_output_%(name)s) != 0)
%(fail)s
""" % subs
result += c_set_tensor4d(outs,
"softmax_output_" + name,
"err" + name, sub['fail'])
# Add on a call to the method that does the actual work.
result += self.method() % subs
......@@ -1457,7 +1446,7 @@ if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s),
return result
def c_code_cache_version(self):
return (0, 7, version())
return (0.1, version())
def method(self):
raise NotImplementedError('GpuDnnSoftmaxBase::method')
......@@ -1489,24 +1478,13 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
def method(self):
return """
#ifndef CUDNN_VERSION
err%(name)s = cudnnSoftmaxForward(
_handle,
algo%(name)s,
mode%(name)s,
softmax_input_%(name)s,
PyGpuArray_DEV_DATA(%(ins)s),
softmax_output_%(name)s,
PyGpuArray_DEV_DATA(%(outs)s)
);
#else
{
const float alpha = 1.;
const float beta = 0.;
err%(name)s = cudnnSoftmaxForward(
_handle,
algo%(name)s,
mode%(name)s,
%(algo)s,
%(mode)s,
(void*) &alpha,
softmax_input_%(name)s,
PyGpuArray_DEV_DATA(%(ins)s),
......@@ -1515,7 +1493,6 @@ err%(name)s = cudnnSoftmaxForward(
PyGpuArray_DEV_DATA(%(outs)s)
);
}
#endif
"""
def grad(self, inp, grads):
......@@ -1558,26 +1535,13 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
def method(self):
return """
#ifndef CUDNN_VERSION
err%(name)s = cudnnSoftmaxBackward(
_handle,
algo%(name)s,
mode%(name)s,
%(name1)s_%(name)s,
PyGpuArray_DEV_DATA(%(ins1)s),
%(name0)s_%(name)s,
PyGpuArray_DEV_DATA(%(ins0)s),
softmax_output_%(name)s,
PyGpuArray_DEV_DATA(%(outs)s)
);
#else
{
const float alpha = 1.;
const float beta = 0.;
err%(name)s = cudnnSoftmaxBackward(
_handle,
algo%(name)s,
mode%(name)s,
%(algo)s,
%(mode)s,
(void*) &alpha,
%(name1)s_%(name)s,
PyGpuArray_DEV_DATA(%(ins1)s),
......@@ -1588,8 +1552,7 @@ err%(name)s = cudnnSoftmaxBackward(
PyGpuArray_DEV_DATA(%(outs)s)
);
}
#endif
"""
"""
# @register_opt('cudnn') # this optimizer is registered in opt.py instead.
......
......@@ -175,8 +175,6 @@ def test_pooling():
func = T.max
else:
func = T.mean
if pad != (0, 0) and dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean:
continue
......@@ -611,15 +609,9 @@ def test_dnn_conv_alpha_output_merge():
lr = numpy.asarray(0.05, dtype='float32')
if dnn.version() == -1:
# Can't merge alpha with cudnn v1
fr = conv + out
wr = kern + gw
ir = img + gi
else:
fr = lr * (conv + out)
wr = kern + lr * gw
ir = img + lr * gi
fr = lr * (conv + out)
wr = kern + lr * gw
ir = img + lr * gi
f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
......@@ -656,9 +648,6 @@ def test_dnn_conv_alpha_output_merge():
def test_dnn_conv_grad():
if not dnn.dnn_available() or dnn.version() == -1:
raise SkipTest('alpha != 1.0 not supported in cudnn v1')
b = 1
c = 4
f = 3
......@@ -696,7 +685,7 @@ def test_dnn_conv_grad():
def test_version():
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
assert isinstance(dnn.version(), (int, tuple))
assert isinstance(dnn.version(), int)
class test_SoftMax(test_nnet.test_SoftMax):
......@@ -705,7 +694,7 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode = mode_with_gpu
def test_softmax_shape_0(self):
raise SkipTest("Cudnn do not suport 0 shapes")
raise SkipTest("Cudnn doesn't suport 0 shapes")
def test_softmax_grad(self):
def cmp(n, m, f, f_gpu):
......@@ -758,18 +747,20 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
self.gpu_grad_op
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
# Optimization is disabled for cudnn v3 rc1
if dnn.version() == 2000:
assert(len([i
for i in sorted_f
if isinstance(
i.op,
self.gpu_grad_op
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# optimization is not applied when cudnn is excluded or not
......@@ -801,15 +792,17 @@ class test_SoftMax(test_nnet.test_SoftMax):
o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
f = theano.function([y], o, mode=mode_with_gpu)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
self.gpu_grad_op
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
if dnn.version() == 2000:
# opt disabled for cudnn v3 rc1
assert(len([i
for i in sorted_f
if isinstance(
i.op,
self.gpu_grad_op
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
......@@ -346,7 +346,6 @@ class test_SoftMax(unittest.TestCase):
return f, f_gpu
def _cmp(self, n, m, f, f_gpu):
# print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
out = f(data)
gout = f_gpu(data)
......@@ -369,8 +368,6 @@ class test_SoftMax(unittest.TestCase):
self._cmp
)
# cuDNN R1 cannot handle these test cases but the Theano softmax can so
# we test them only for the Theano softmax.
self._cmp(2 << 15, 5, f, f_gpu)
def test_softmax_shape_0(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论