提交 c4ac9362 authored 作者: abergeron's avatar abergeron

Merge pull request #2617 from nouiz/cleanup_grad

Cleanup dnn conv stuff.
...@@ -3516,6 +3516,9 @@ class GpuContiguous(GpuOp): ...@@ -3516,6 +3516,9 @@ class GpuContiguous(GpuOp):
assert i.is_c_contiguous() assert i.is_c_contiguous()
out[0][0] = i out[0][0] = i
def infer_shape(self, node, xshp):
return xshp
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
input, = inp input, = inp
z, = out z, = out
......
...@@ -8,6 +8,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented ...@@ -8,6 +8,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented
from theano.gof import Optimizer, local_optimizer, COp from theano.gof import Optimizer, local_optimizer, COp
from theano.gof.type import CDataType, Generic from theano.gof.type import CDataType, Generic
from theano.compat import PY3 from theano.compat import PY3
from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.configparser import AddConfigVar, EnumStr from theano.configparser import AddConfigVar, EnumStr
from theano.tensor.nnet import SoftmaxGrad from theano.tensor.nnet import SoftmaxGrad
...@@ -1523,27 +1524,31 @@ if True: ...@@ -1523,27 +1524,31 @@ if True:
rval, node.outputs[0].type.broadcastable) rval, node.outputs[0].type.broadcastable)
return [rval] return [rval]
@register_opt('cudnn')
@local_optimizer([GpuDnnConv], inplace=True) @local_optimizer([GpuDnnConv], inplace=True)
def local_dnn_conv_inplace(node): def local_dnn_conv_inplace(node):
if type(node.op) != GpuDnnConv or node.op.inplace == True: if type(node.op) != GpuDnnConv or node.op.inplace == True:
return return
return [GpuDnnConv(workmem=node.op.workmem, inplace=True)(*node.inputs)] return [GpuDnnConv(workmem=node.op.workmem, inplace=True)(*node.inputs)]
@register_opt('cudnn')
@local_optimizer([GpuDnnConvGradW], inplace=True) @local_optimizer([GpuDnnConvGradW], inplace=True)
def local_dnn_convgw_inplace(node): def local_dnn_convgw_inplace(node):
if type(node.op) != GpuDnnConvGradW or node.op.inplace == True: if type(node.op) != GpuDnnConvGradW or node.op.inplace == True:
return return
return [GpuDnnConvGradW(inplace=True)(*node.inputs)] return [GpuDnnConvGradW(inplace=True)(*node.inputs)]
@register_opt('cudnn')
@local_optimizer([GpuDnnConvGradI], inplace=True) @local_optimizer([GpuDnnConvGradI], inplace=True)
def local_dnn_convgi_inplace(node): def local_dnn_convgi_inplace(node):
if type(node.op) != GpuDnnConvGradI or node.op.inplace == True: if type(node.op) != GpuDnnConvGradI or node.op.inplace == True:
return return
return [GpuDnnConvGradI(inplace=True)(*node.inputs)] return [GpuDnnConvGradI(inplace=True)(*node.inputs)]
optdb.register('local_dnn_conv_inplace',
tensor.opt.in2out(local_dnn_conv_inplace,
local_dnn_convgw_inplace,
local_dnn_convgi_inplace,
name="local_dnn_conv_inplace"),
70.0, 'fast_run', 'inplace', 'gpu', 'cudnn')
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, nd=4) @alpha_merge(GpuDnnConv, alpha_in=4, nd=4)
def local_dnn_conv_alpha_merge(node, *inputs): def local_dnn_conv_alpha_merge(node, *inputs):
......
...@@ -87,6 +87,7 @@ gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer, ...@@ -87,6 +87,7 @@ gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer,
register_opt()(theano.tensor.opt.local_track_shape_i) register_opt()(theano.tensor.opt.local_track_shape_i)
register_opt(name='gpu_constant_folding')( register_opt(name='gpu_constant_folding')(
tensor.opt.constant_folding) tensor.opt.constant_folding)
register_opt()(theano.tensor.opt.local_subtensor_make_vector)
# This is a partial list of CPU ops that can be in some circonstance # This is a partial list of CPU ops that can be in some circonstance
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论