提交 f292f7e2 authored 作者: Frederic Bastien's avatar Frederic Bastien

Merge branch 'shape_feature' of github.com:nouiz/Theano into opt

...@@ -710,18 +710,14 @@ def local_gpua_careduce(node, context_name): ...@@ -710,18 +710,14 @@ def local_gpua_careduce(node, context_name):
assert reduce_mask[a] == 0 assert reduce_mask[a] == 0
reduce_mask[a] = 1 reduce_mask[a] = 1
shape_of = node.fgraph.shape_feature.shape_of new_in_shp = [shape_i(x, 0)]
x_shape = shape_of[x]
new_in_shp = [x_shape[0]]
new_mask = [reduce_mask[0]] new_mask = [reduce_mask[0]]
for i in xrange(1, x.type.ndim): for i in xrange(1, x.type.ndim):
if reduce_mask[i] == reduce_mask[i - 1]: if reduce_mask[i] == reduce_mask[i - 1]:
new_in_shp[-1] *= x_shape[i] new_in_shp[-1] *= shape_i(x, i)
else: else:
new_mask.append(reduce_mask[i]) new_mask.append(reduce_mask[i])
new_in_shp.append(x_shape[i]) new_in_shp.append(shape_i(x, i))
new_axis = [] new_axis = []
for idx, m in enumerate(new_mask): for idx, m in enumerate(new_mask):
if m == 1: if m == 1:
...@@ -743,8 +739,12 @@ def local_gpua_careduce(node, context_name): ...@@ -743,8 +739,12 @@ def local_gpua_careduce(node, context_name):
greduce(gpu_reshaped_x)) greduce(gpu_reshaped_x))
if reduce_reshaped_x.ndim != node.outputs[0].ndim: if reduce_reshaped_x.ndim != node.outputs[0].ndim:
out_shp = []
for i in range(x.ndim):
if i not in node.op.axis:
out_shp.append(shape_i(x, i))
unreshaped_reduce = reduce_reshaped_x.reshape( unreshaped_reduce = reduce_reshaped_x.reshape(
tensor.stack(shape_of[node.outputs[0]])) tensor.stack(out_shp))
else: else:
unreshaped_reduce = reduce_reshaped_x unreshaped_reduce = reduce_reshaped_x
return [unreshaped_reduce] return [unreshaped_reduce]
......
...@@ -14,6 +14,7 @@ from . import dnn ...@@ -14,6 +14,7 @@ from . import dnn
import theano import theano
from theano import scalar as scal from theano import scalar as scal
from theano import config, tensor, gof from theano import config, tensor, gof
from theano.compile.ops import shape_i
import theano.ifelse import theano.ifelse
import theano.tensor.signal.pool import theano.tensor.signal.pool
import theano.tensor.nnet import theano.tensor.nnet
...@@ -899,18 +900,14 @@ def local_gpu_careduce(node): ...@@ -899,18 +900,14 @@ def local_gpu_careduce(node):
# to make them a single dimension, do the reduction, and # to make them a single dimension, do the reduction, and
# then reshape to get them back. # then reshape to get them back.
shape_of = node.fgraph.shape_feature.shape_of new_in_shp = [shape_i(x, 0)]
x_shape = shape_of[x]
new_in_shp = [x_shape[0]]
new_mask = [reduce_mask[0]] new_mask = [reduce_mask[0]]
for i in xrange(1, x.type.ndim): for i in xrange(1, x.type.ndim):
if reduce_mask[i] == reduce_mask[i - 1]: if reduce_mask[i] == reduce_mask[i - 1]:
new_in_shp[-1] *= x_shape[i] new_in_shp[-1] *= shape_i(x, i)
else: else:
new_mask.append(reduce_mask[i]) new_mask.append(reduce_mask[i])
new_in_shp.append(x_shape[i]) new_in_shp.append(shape_i(x, i))
new_greduce = GpuCAReduce(new_mask, scalar_op) new_greduce = GpuCAReduce(new_mask, scalar_op)
new_x = x.reshape(tensor.stack(new_in_shp)) new_x = x.reshape(tensor.stack(new_in_shp))
...@@ -935,8 +932,11 @@ def local_gpu_careduce(node): ...@@ -935,8 +932,11 @@ def local_gpu_careduce(node):
# Restore the expected shape of the output # Restore the expected shape of the output
if rval.ndim != out.ndim: if rval.ndim != out.ndim:
rval = rval.reshape( out_shp = []
tensor.stack(shape_of[out])) for i in range(x.ndim):
if i not in node.op.axis:
out_shp.append(shape_i(x, i))
rval = rval.reshape(tensor.stack(out_shp))
if rval.type == out.type: if rval.type == out.type:
return [rval] return [rval]
......
...@@ -1260,6 +1260,12 @@ class ShapeFeature(object): ...@@ -1260,6 +1260,12 @@ class ShapeFeature(object):
for node in fgraph.toposort(): for node in fgraph.toposort():
self.on_import(fgraph, node, reason='on_attach') self.on_import(fgraph, node, reason='on_attach')
def on_detach(self, fgraph):
self.shape_of = {}
self.scheduled = {}
self.shape_of_reverse_index = {}
del fgraph.shape_feature
def on_import(self, fgraph, node, reason): def on_import(self, fgraph, node, reason):
if node.outputs[0] in self.shape_of: if node.outputs[0] in self.shape_of:
# this is a revert, not really an import # this is a revert, not really an import
...@@ -1430,19 +1436,28 @@ class ShapeFeature(object): ...@@ -1430,19 +1436,28 @@ class ShapeFeature(object):
class ShapeOptimizer(Optimizer): class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an fgraph feature.""" """Optimizer that serves to add ShapeFeature as an fgraph feature."""
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
fgraph.attach_feature(ShapeFeature()) fgraph.attach_feature(ShapeFeature())
def apply(self, fgraph): def apply(self, fgraph):
pass pass
class UnShapeOptimizer(Optimizer):
"""Optimizer remove ShapeFeature as an fgraph feature."""
def apply(self, fgraph):
for feature in fgraph._features:
if isinstance(feature, ShapeFeature):
fgraph.remove_feature(feature)
# Register it after merge1 optimization at 0. We don't want to track # Register it after merge1 optimization at 0. We don't want to track
# the shape of merged node. # the shape of merged node.
theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(), theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(),
0.1, 'fast_run', 'fast_compile') 0.1, 'fast_run', 'fast_compile')
# Not enabled by default for now. Some crossentropy opt use the
# shape_feature. They are at step 2.01. uncanonicalize is at step
# 3. After it goes to 48.5 that move to the gpu. So 10 seem resonable.
theano.compile.mode.optdb.register('UnShapeOpt', UnShapeOptimizer(),
10)
def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP): def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论