Create fast_compile_gpu. Reusing gpu tag for nnet opt don't work.

This is because if we exclude the 'gpu' tag, we don't want those opt to also be enabled.

Create fast_compile_gpu. Reusing gpu tag for nnet opt don't work.
e08c57b3 · Frederic · c6e55759 · e08c57b3 · e08c57b3 · e08c57b3
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -93,7 +93,13 @@ OPT_NONE = gof.Query(include=[], exclude=exclude)
 OPT_MERGE = gof.Query(include=['merge'], exclude=exclude)
 OPT_FAST_RUN = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
-OPT_FAST_COMPILE = gof.Query(include=['fast_compile'], exclude=exclude)
+# We need fast_compile_gpu here.  As on the GPU, we don't have all
+# operation that exist in fast_compile, but have some that get
+# introduced in fast_run, we want those optimization to also run in
+# fast_compile+gpu. We can't tag them just as 'gpu', as this would
+# exclude them if we exclude 'gpu'.
+OPT_FAST_COMPILE = gof.Query(include=['fast_compile', 'fast_compile_gpu'],
+                             exclude=exclude)
 OPT_STABILIZE = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_STABILIZE.position_cutoff = 1.5000001
 OPT_NONE.name = 'OPT_NONE'
@@ -191,7 +197,7 @@ optdb.register('Print1.51', PrintCurrentFunctionGraph('Post-stabilize'),

 # misc special cases for speed
 optdb.register('specialize', gof.EquilibriumDB(),
-        2, 'fast_run')
+        2, 'fast_run', 'fast_compile_gpu')

 # misc special cases for speed that break canonicalization
 optdb.register('uncanonicalize', gof.EquilibriumDB(),

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -577,7 +577,7 @@ class Softmax(gof.Op):
 softmax = Softmax()


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax])
 def local_softmax_with_bias(node):
    """Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
@@ -1330,8 +1330,8 @@ class CrossentropyCategorical1Hot(gof.Op):
 crossentropy_categorical_1hot = CrossentropyCategorical1Hot()


-@opt.register_stabilize('gpu')
-@opt.register_specialize('gpu')
+@opt.register_stabilize('fast_compile_gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.optimizer
 def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
    """This is a stabilization optimization
@@ -1404,10 +1404,10 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):

 optdb.register('crossentropy_to_crossentropy_with_softmax',
               crossentropy_to_crossentropy_with_softmax, 2.01,
-               'fast_run', 'xent', 'gpu')
+               'fast_run', 'xent', 'fast_compile_gpu')


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_grad])
 def local_crossentropy_to_crossentropy_with_softmax_grad(node):
    if node.op == softmax_grad:
@@ -1420,7 +1420,7 @@ def local_crossentropy_to_crossentropy_with_softmax_grad(node):
            return [dx]


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([tensor._max_and_argmax])
 def local_argmax_pushdown(node):
    if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
@@ -1506,7 +1506,7 @@ def _is_const(z, val, approx=False):
        return numpy.all(maybe == val)


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
 def local_advanced_indexing_crossentropy_onehot(node):
    log = None
@@ -1547,7 +1547,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
                                                                    labels)[0]]


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_grad])
 def local_advanced_indexing_crossentropy_onehot_grad(node):
    if not (node.op == softmax_grad):
@@ -1770,7 +1770,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
        return


-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_with_bias])
 def graph_merge_softmax_with_crossentropy_softmax(node):
    if node.op == softmax_with_bias:
@@ -1976,4 +1976,4 @@ local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')),
 #don't do register_stabilize, this is to make local_log_softmax run
 #only after another more specific optimization that stabilizes cross entropy
 #opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
-opt.register_specialize(local_log_softmax, 'gpu', name='local_log_softmax')
+opt.register_specialize(local_log_softmax, 'fast_compile_gpu', name='local_log_softmax')
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -338,7 +338,8 @@ def register_specialize(lopt, *tags, **kwargs):
        return register
    else:
        name = (kwargs and kwargs.pop('name')) or lopt.__name__
-        compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
+        compile.optdb['specialize'].register(name, lopt, 'fast_run',
+                                             'fast_compile_gpu', *tags)
        return lopt


@@ -1319,7 +1320,7 @@ def local_track_shape_i(node):


 @register_specialize
-@register_canonicalize('gpu')
+@register_canonicalize('fast_compile_gpu')
 @gof.local_optimizer([Subtensor])
 def local_subtensor_make_vector(node):
    # replace all subtensor(make_vector) like: