提交 3c1025e4 authored 作者: Frederic's avatar Frederic

Reimplement shape_i to not disable shape optimization in some cases

上级 ae0797b3
......@@ -387,17 +387,34 @@ class Shape_i(gof.Op):
return [None]
def shape_i(var, i):
"""This is useful in optimization that need to get the shape. This
def shape_i(var, i, fgraph=None):
"""Equivalent of var.shape[i], but apply if possible the shape
feature optimization
This is useful in optimization that need to get the shape. This
remove the need of the following shape_feature optimization that
convert it. So this speed up optimization and remove Equilibrium
max iteration problems.
:param var: the variable we want to take the shape of
:param i: The shape dimensions we want
:param fgraph: optional. If var.fgraph do not exist, the fgraph that
have the shape_feature to introduce var in to get the optimized shape.
"""
if (hasattr(var, 'fgraph') and
hasattr(node.outputs[0].fgraph, 'shape_feature')):
return node.outputs[0].fgraph.shape_feature.shape_of[var][i]
return Shape_i(i)(var)
if fgraph is None and hasattr(var, 'fgraph'):
fgraph = var.fgraph
if fgraph and hasattr(fgraph, 'shape_feature'):
if var not in fgraph.shape_feature.shape_of:
# If var isn't in the ShapeFeature, add it.
fgraph.shape_feature.on_import(fgraph, var.owner,
'gof.ops.shape_i')
return fgraph.shape_feature.shape_of[var][i]
# If we are not able to use the shape feature, we should not put
# Shape_i in the graph. Otherwise, the shape feature optimization
# won't get applied.
return var.shape[i]
def register_shape_i_c_code(typ, code, check_input, version=()):
......
......@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
......@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape = theano.tensor.stack(shape_i(img, 0), shape_i(kerns, 1),
shape_i(img, 2) + shape_i(kerns, 2) - 1,
shape_i(img, 3) + shape_i(kerns, 3)- 1)
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
shape = theano.tensor.stack(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph),
shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(shape, kerns.shape)
return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3])
return GpuDnnConvGradI()(kerns, img, desc, shape2, shape3)
# Standard case: We use GpuDnnConv with suitable padding.
img = gpu_contiguous(img)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论