提交 3c1025e4 authored 作者: Frederic's avatar Frederic

Reimplement shape_i to not disable shape optimization in some cases

上级 ae0797b3
...@@ -387,17 +387,34 @@ class Shape_i(gof.Op): ...@@ -387,17 +387,34 @@ class Shape_i(gof.Op):
return [None] return [None]
def shape_i(var, i): def shape_i(var, i, fgraph=None):
"""This is useful in optimization that need to get the shape. This """Equivalent of var.shape[i], but apply if possible the shape
feature optimization
This is useful in optimization that need to get the shape. This
remove the need of the following shape_feature optimization that remove the need of the following shape_feature optimization that
convert it. So this speed up optimization and remove Equilibrium convert it. So this speed up optimization and remove Equilibrium
max iteration problems. max iteration problems.
:param var: the variable we want to take the shape of
:param i: The shape dimensions we want
:param fgraph: optional. If var.fgraph do not exist, the fgraph that
have the shape_feature to introduce var in to get the optimized shape.
""" """
if (hasattr(var, 'fgraph') and if fgraph is None and hasattr(var, 'fgraph'):
hasattr(node.outputs[0].fgraph, 'shape_feature')): fgraph = var.fgraph
return node.outputs[0].fgraph.shape_feature.shape_of[var][i] if fgraph and hasattr(fgraph, 'shape_feature'):
return Shape_i(i)(var) if var not in fgraph.shape_feature.shape_of:
# If var isn't in the ShapeFeature, add it.
fgraph.shape_feature.on_import(fgraph, var.owner,
'gof.ops.shape_i')
return fgraph.shape_feature.shape_of[var][i]
# If we are not able to use the shape feature, we should not put
# Shape_i in the graph. Otherwise, the shape feature optimization
# won't get applied.
return var.shape[i]
def register_shape_i_c_code(typ, code, check_input, version=()): def register_shape_i_c_code(typ, code, check_input, version=()):
......
...@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
work with this Op. work with this Op.
""" """
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1,1) and if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
...@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape = theano.tensor.stack(shape_i(img, 0), shape_i(kerns, 1), shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
shape_i(img, 2) + shape_i(kerns, 2) - 1, shape = theano.tensor.stack(shape_i(img, 0, fgraph),
shape_i(img, 3) + shape_i(kerns, 3)- 1) shape_i(kerns, 1, fgraph),
shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(shape, kerns.shape) conv_mode=conv_mode)(shape, kerns.shape)
return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3]) return GpuDnnConvGradI()(kerns, img, desc, shape2, shape3)
# Standard case: We use GpuDnnConv with suitable padding. # Standard case: We use GpuDnnConv with suitable padding.
img = gpu_contiguous(img) img = gpu_contiguous(img)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论