提交 3b18fef4 authored 作者: Frederic Bastien's avatar Frederic Bastien

added an optimization phase: specialize_device and put the new POW optimization into that phase.

上级 b91efd13
......@@ -137,6 +137,8 @@ optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable s
1.5, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed
2, 'fast_run')
optdb.register('specialize_device', gof.EquilibriumDB(), # misc special cases for speed that are dependent on the device.
48.6, 'fast_run')#must be after gpu stuff at 48.5
optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge
49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(),
......
......@@ -144,6 +144,11 @@ def register_specialize(lopt, *tags, **kwargs):
compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
return lopt
def register_specialize_device(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['specialize_device'].register(name, lopt, 'fast_run', *tags)
return lopt
def register_stabilize(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['stabilize'].register(name, lopt, 'fast_run', *tags)
......@@ -1829,9 +1834,31 @@ def local_pow_specialize(node):
rval = [T.inv(xsym)]
if N.all(y == -2):
rval = [T.inv(T.sqr(xsym))]
if rval:
rval[0] = T.cast(rval[0], odtype)
assert rval[0].type == node.outputs[0].type, (rval, node.outputs)
return rval
else:
return False
register_specialize(local_pow_specialize)
# Optimize all integral powers in [-RANGE, RANGE]
if config.experimental.pow and rval is None and abs(y)==int(abs(y)) and abs(y) <= 512:# 512 is too small for the cpu and too big for some gpu!
@register_specialize_device
@gof.local_optimizer([T.pow])
def local_pow_specialize_device(node):
"""
This optimization is not the same on all device. We do it only on cpu here.
"""
if node.op == T.pow:
#the idea here is that we have pow(x, y)
odtype = node.outputs[0].dtype
xsym = node.inputs[0]
ysym = node.inputs[1]
y = local_mul_canonizer.get_constant(ysym)
if (y is not None) \
and encompasses_broadcastable(xsym.type.broadcastable, ysym.type.broadcastable):
rval = None
# 512 is too small for the cpu and too big for some gpu!
if abs(y)==int(abs(y)) and abs(y) <= 512:
pow2 = [xsym]
pow2_scal = [theano.scalar.Scalar(xsym.dtype)()]
y_to_do = abs(y)
......@@ -1861,14 +1888,7 @@ def local_pow_specialize(node):
rval[0] = T.cast(rval[0], odtype)
assert rval[0].type == node.outputs[0].type, (rval, node.outputs)
return rval
else:
return False
register_specialize(local_pow_specialize)
theano.configparser.AddConfigVar('experimental.pow',
"Transform a pow to a constant integer to a graph of mul. Fast on cpu, but more work needed for gpu.",
theano.configparser.BoolParam(False),
)
@gof.local_optimizer([T.mul])
def local_mul_specialize(node):
"""Remove special-case constants from mul arguments
......
......@@ -1259,6 +1259,7 @@ def test_local_pow_specialize():
v = T.vector()
val = numpy.arange(10,dtype=theano.config.floatX)
val_no0 = numpy.arange(1,10,dtype=theano.config.floatX)
f = function([v], v**0, mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert nodes == [Shape_i(0), T.alloc]
......@@ -1300,33 +1301,44 @@ def test_local_pow_specialize():
# assert nodes == [T.sqrt,T.inv]#Why this don't work?
assert numpy.allclose(f(val_no0),val_no0**(-.5))
if config.experimental.pow:
print "Test experimental.pow=True"
f = function([v], v**(15), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes)==1
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert numpy.allclose(f(val),val**15)
f = function([v], v**(-15), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes)==2
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
assert numpy.allclose(f(val_no0),val_no0**(-15))
f = function([v], v**(16), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes) == 1
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert numpy.allclose(f(val),val**16)
f = function([v], v**(-16), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes) == 2
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
assert numpy.allclose(f(val_no0),val_no0**(-16))
def test_local_pow_specialize_device():
# test that on cpu we use more agressive optimization
mode = theano.config.mode
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
mode = compile.mode.get_mode(mode)
mode = mode.excluding('fusion').excluding('gpu')
v = T.vector()
val = numpy.arange(10,dtype=theano.config.floatX)
val_no0 = numpy.arange(1,10,dtype=theano.config.floatX)
f = function([v], v**(15), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes)==1
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert numpy.allclose(f(val),val**15)
f = function([v], v**(-15), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes)==2
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
assert numpy.allclose(f(val_no0),val_no0**(-15))
f = function([v], v**(16), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes) == 1
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert numpy.allclose(f(val),val**16)
f = function([v], v**(-16), mode=mode)
nodes = [node.op for node in f.maker.env.toposort()]
assert len(nodes) == 2
assert isinstance(nodes[0].scalar_op,theano.scalar.Composite)
assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
assert numpy.allclose(f(val_no0),val_no0**(-16))
class T_Rebroadcast(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论