提交 bdcb8d26 authored 作者: carriepl's avatar carriepl

Add opt to convert LogSoftmax to DnnLogSoftmax in cuda backend

上级 9a0ab543
...@@ -10,7 +10,7 @@ from theano.gof import Optimizer, local_optimizer, COp ...@@ -10,7 +10,7 @@ from theano.gof import Optimizer, local_optimizer, COp
from theano.gof.type import CDataType, Generic from theano.gof.type import CDataType, Generic
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.tensor.nnet import SoftmaxGrad from theano.tensor.nnet import LogSoftmax, SoftmaxGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
...@@ -2400,11 +2400,12 @@ if True: ...@@ -2400,11 +2400,12 @@ if True:
return [out] return [out]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise, LogSoftmax])
def local_log_softmax_dnn(node): def local_log_softmax_dnn(node):
# The log-softmax implementation is only available starting at CuDNN V3 # The log-softmax implementation is only available starting at CuDNN V3
if not dnn_available() or version() < (3000, 3000): if not dnn_available() or version() < (3000, 3000):
return return
if (isinstance(node.op, GpuElemwise) and if (isinstance(node.op, GpuElemwise) and
isinstance(node.op.scalar_op, Log) and isinstance(node.op.scalar_op, Log) and
node.inputs[0].owner and node.inputs[0].owner and
...@@ -2419,6 +2420,21 @@ if True: ...@@ -2419,6 +2420,21 @@ if True:
new_log_softmax = new_softmax_node(softmax_node.inputs[0]) new_log_softmax = new_softmax_node(softmax_node.inputs[0])
return [new_log_softmax] return [new_log_softmax]
elif (isinstance(node.op, LogSoftmax) and node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, HostFromGpu)):
if not dnn_available():
return
# Transform the input in the format expected by GpuDnnSoftmax
inp = node.inputs[0].owner.inputs[0]
if inp.ndim != 2:
return
inp = inp.dimshuffle(0, 1, 'x', 'x')
# Apply GpuDnnSoftmax and return the result
out = GpuDnnSoftmax('bc01', 'log', 'channel')(gpu_contiguous(inp))
return [out.dimshuffle(0, 1)]
class NoCuDNNRaise(Optimizer): class NoCuDNNRaise(Optimizer):
def apply(self, fgraph): def apply(self, fgraph):
""" Raise a RuntimeError if cudnn can't be used""" """ Raise a RuntimeError if cudnn can't be used"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论