提交 b62e6ad4 authored 作者: Frederic Bastien's avatar Frederic Bastien

Speed up GpuDnnSoftmaxGrad by using the best mapping.

上级 80efda3f
...@@ -1634,8 +1634,8 @@ def local_softmax_dnn_grad(node, ctx_name): ...@@ -1634,8 +1634,8 @@ def local_softmax_dnn_grad(node, ctx_name):
n = as_gpuarray_variable(n, ctx_name) n = as_gpuarray_variable(n, ctx_name)
if n.ndim != 2: if n.ndim != 2:
return return
ins.append(n.dimshuffle(0, 1, 'x', 'x')) ins.append(n.dimshuffle(0, 'x', 1, 'x'))
out = GpuDnnSoftmaxGrad('accurate', 'channel')( out = GpuDnnSoftmaxGrad('accurate', 'instance')(
gpu_contiguous(ins[0]), gpu_contiguous(ins[1])) gpu_contiguous(ins[0]), gpu_contiguous(ins[1]))
return [out.dimshuffle(0, 1)] return [out.dimshuffle(0, 2)]
...@@ -836,6 +836,8 @@ class test_SoftMax(test_nnet.test_SoftMax): ...@@ -836,6 +836,8 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode=mode_with_gpu mode=mode_with_gpu
) )
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
val = numpy.random.rand(5).astype('float32')
out_dnn = f(val)
assert(len([i assert(len([i
for i in sorted_f for i in sorted_f
if isinstance( if isinstance(
...@@ -860,6 +862,8 @@ class test_SoftMax(test_nnet.test_SoftMax): ...@@ -860,6 +862,8 @@ class test_SoftMax(test_nnet.test_SoftMax):
mode=mode_wo_cudnn mode=mode_wo_cudnn
) )
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
out_cpu = f(val)
utt.assert_allclose(out_dnn, out_cpu)
assert(len([i assert(len([i
for i in sorted_f for i in sorted_f
if isinstance( if isinstance(
......
...@@ -2590,17 +2590,17 @@ if True: ...@@ -2590,17 +2590,17 @@ if True:
n = n.owner.inputs[0] n = n.owner.inputs[0]
if n.ndim != 2: if n.ndim != 2:
return return
ins.append(n.dimshuffle(0, 1, 'x', 'x')) ins.append(n.dimshuffle(0, 'x', 1, 'x'))
out = GpuDnnSoftmaxGrad( out = GpuDnnSoftmaxGrad(
'bc01', 'bc01',
'accurate', 'accurate',
'channel' 'instance',
)( )(
gpu_contiguous(ins[0]), gpu_contiguous(ins[0]),
gpu_contiguous(ins[1]) gpu_contiguous(ins[1])
) )
return [out.dimshuffle(0, 1)] return [out.dimshuffle(0, 2)]
# AbstractConv Optimizations # AbstractConv Optimizations
......
...@@ -585,6 +585,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax): ...@@ -585,6 +585,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
mode=mode_with_gpu mode=mode_with_gpu
) )
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
val = numpy.random.rand(5).astype('float32')
out_dnn = f(val)
assert(len([i assert(len([i
for i in sorted_f for i in sorted_f
if isinstance( if isinstance(
...@@ -608,6 +610,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax): ...@@ -608,6 +610,8 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
mode=mode_wo_cudnn mode=mode_wo_cudnn
) )
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
out_cpu = f(val)
utt.assert_allclose(out_dnn, out_cpu)
assert(len([i assert(len([i
for i in sorted_f for i in sorted_f
if isinstance( if isinstance(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论