提交 21e14ca4 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #4532 from nouiz/dnn_softmax_speed

Speed up GpuDnnSoftmaxGrad by using the best mapping.
......@@ -1634,8 +1634,8 @@ def local_softmax_dnn_grad(node, ctx_name):
n = as_gpuarray_variable(n, ctx_name)
if n.ndim != 2:
return
ins.append(n.dimshuffle(0, 1, 'x', 'x'))
ins.append(n.dimshuffle(0, 'x', 1, 'x'))
out = GpuDnnSoftmaxGrad('accurate', 'channel')(
out = GpuDnnSoftmaxGrad('accurate', 'instance')(
gpu_contiguous(ins[0]), gpu_contiguous(ins[1]))
return [out.dimshuffle(0, 1)]
return [out.dimshuffle(0, 2)]
......@@ -836,6 +836,8 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
val = numpy.random.rand(5).astype('float32')
out_dnn = f(val)
assert(len([i
for i in sorted_f
if isinstance(
......@@ -860,6 +862,8 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode=mode_wo_cudnn
)
sorted_f = f.maker.fgraph.toposort()
out_cpu = f(val)
utt.assert_allclose(out_dnn, out_cpu)
assert(len([i
for i in sorted_f
if isinstance(
......
......@@ -2590,17 +2590,17 @@ if True:
n = n.owner.inputs[0]
if n.ndim != 2:
return
ins.append(n.dimshuffle(0, 1, 'x', 'x'))
ins.append(n.dimshuffle(0, 'x', 1, 'x'))
out = GpuDnnSoftmaxGrad(
'bc01',
'accurate',
'channel'
'instance',
)(
gpu_contiguous(ins[0]),
gpu_contiguous(ins[1])
)
return [out.dimshuffle(0, 1)]
return [out.dimshuffle(0, 2)]
# AbstractConv Optimizations
......
......@@ -585,6 +585,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
val = numpy.random.rand(5).astype('float32')
out_dnn = f(val)
assert(len([i
for i in sorted_f
if isinstance(
......@@ -608,6 +610,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
mode=mode_wo_cudnn
)
sorted_f = f.maker.fgraph.toposort()
out_cpu = f(val)
utt.assert_allclose(out_dnn, out_cpu)
assert(len([i
for i in sorted_f
if isinstance(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论