提交 b9d546a9 authored 作者: Dustin Webb's avatar Dustin Webb

Added optimization that converts SoftmaxGrad to DnnSoftmaxGrad and associated…

Added optimization that converts SoftmaxGrad to DnnSoftmaxGrad and associated test to make sure it is applied correctly.
上级 1625e634
...@@ -5,11 +5,12 @@ from theano import Apply, gof, tensor ...@@ -5,11 +5,12 @@ from theano import Apply, gof, tensor
from theano.gof import Optimizer from theano.gof import Optimizer
from theano.gof.type import CDataType from theano.gof.type import CDataType
from theano.compat import PY3 from theano.compat import PY3
from theano.tensor.nnet import SoftmaxGrad
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number, from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number,
device_properties) device_properties)
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous) gpu_contiguous, HostFromGpu)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax from theano.sandbox.cuda.nnet import GpuSoftmax
...@@ -887,7 +888,7 @@ cudnnTensor4dDescriptor_t %(name)s_%(id)d; ...@@ -887,7 +888,7 @@ cudnnTensor4dDescriptor_t %(name)s_%(id)d;
%(name)s_%(id)d = NULL; %(name)s_%(id)d = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"%%s", cudnnGetErrorString(err%(id)d)); ": %%s", cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
} }
""" % dict(name=name, id=id, fail=fail) """ % dict(name=name, id=id, fail=fail)
...@@ -1152,3 +1153,27 @@ if cuda_available: ...@@ -1152,3 +1153,27 @@ if cuda_available:
" to use it. We got this error: \n" + " to use it. We got this error: \n" +
dnn_available.msg) dnn_available.msg)
gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn')
@local_optimizer([SoftmaxGrad])
def local_softmax_dnn_grad(node):
if (
isinstance(node.op, SoftmaxGrad)
and (isinstance(node.inputs[0].owner.op, HostFromGpu)
or isinstance(node.inputs[1].owner.op, HostFromGpu))
):
ins = []
for n in node.inputs:
if isinstance(n.owner.op, HostFromGpu):
n = n.owner.inputs[0]
ins.append(n.dimshuffle(0, 1, 'x', 'x'))
out = GpuDnnSoftmaxGrad(
'bc01',
'accurate',
'channel'
)(
ins[0],
gpu_contiguous(ins[1])
)
return [out.dimshuffle(0, 1)]
...@@ -276,7 +276,7 @@ class test_SoftMax(unittest.TestCase): ...@@ -276,7 +276,7 @@ class test_SoftMax(unittest.TestCase):
x = T.fmatrix('x') x = T.fmatrix('x')
z = T.nnet.softmax z = T.nnet.softmax
def check_types(graph, graph_gpu): def check_types_without_cudnn(graph, graph_gpu):
self._check_types( self._check_types(
graph, graph,
graph_gpu, graph_gpu,
...@@ -285,14 +285,15 @@ class test_SoftMax(unittest.TestCase): ...@@ -285,14 +285,15 @@ class test_SoftMax(unittest.TestCase):
cuda.nnet.GpuSoftmax cuda.nnet.GpuSoftmax
) )
mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
f, f_gpu = self._test_softmax( f, f_gpu = self._test_softmax(
x, x,
x, x,
z, z,
z, z,
self._cmp, self._cmp,
mode_with_gpu, mode_wo_cudnn,
check_types check_types_without_cudnn
) )
# cuDNN R1 cannot handle these test cases but the Theano softmax can so # cuDNN R1 cannot handle these test cases but the Theano softmax can so
...@@ -300,6 +301,25 @@ class test_SoftMax(unittest.TestCase): ...@@ -300,6 +301,25 @@ class test_SoftMax(unittest.TestCase):
self._cmp(2 << 15, 5, f, f_gpu) self._cmp(2 << 15, 5, f, f_gpu)
self._cmp(0, 10, f, f_gpu) self._cmp(0, 10, f, f_gpu)
def check_types_with_cudnn(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-3,
type(z),
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)
f, f_gpu = self._test_softmax(
x,
x,
z,
z,
self._cmp,
mode_with_gpu,
check_types_with_cudnn
)
def test_cudnn_softmax(self): def test_cudnn_softmax(self):
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
...@@ -357,5 +377,29 @@ class test_SoftMax(unittest.TestCase): ...@@ -357,5 +377,29 @@ class test_SoftMax(unittest.TestCase):
check_types check_types
) )
mode = mode_with_gpu.including("cudnn") mode_w_cudnn = mode_with_gpu.including("cudnn")
self._test_softmax(x, x, f_z, f_z, self._cmp, mode, check_types_opt) self._test_softmax(
x, x, f_z, f_z, self._cmp,
mode_w_cudnn, check_types_opt
)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization
y = T.vector('y')
f = theano.function(
[y],
T.grad(T.nnet.softmax(y).mean(), y),
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论