Register to the gpu tag more opt, as we don't have all the other case on the GPU…

Register to the gpu tag more opt, as we don't have all the other case on the GPU otherwise in fast_compile.

Register to the gpu tag more opt, as we don't have all the other case on the GPU…
705806f4 · Frederic · 9de8a09d · 705806f4
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
 """Provides neural-network specific Ops.
 :note: TODO: factor this out into a neural-network toolbox.
+:note: We register all optimization with the gpu tag as we don't
+    implement all the intermediate case on the GPU (in particular
+    AdvancedSubtensor). So to make sure it run well on the gpu with
+    fast_compile, we register them as needed for the GPU. This can be
+    revisited later when all the intermediate part are on the GPU.
 """
 import logging
 import numpy
@@ -570,7 +577,6 @@ class Softmax(gof.Op):
 softmax = Softmax()
-@opt.register_specialize
 @gof.local_optimizer([softmax])
 def local_softmax_with_bias(node):
    """Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
@@ -629,6 +635,7 @@ def local_softmax_with_bias(node):
                    #This condition is not always true. See the test
                    #nnet/tests/test_nnet.py:T_SoftmaxWithBias.test_broadcast
                    return [sm_bias]
+opt.register_specialize(local_softmax_with_bias, 'gpu')
 def softmax_simplifier(numerators, denominators):
@@ -1323,8 +1330,6 @@ class CrossentropyCategorical1Hot(gof.Op):
 crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
-@opt.register_stabilize
-@opt.register_specialize
 @gof.optimizer
 def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
    """This is a stabilization optimization
@@ -1352,6 +1357,10 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
    while search_make_one_sub():
        pass
    return
+opt.register_stabilize(crossentropy_to_crossentropy_with_softmax_with_bias,
+                       'gpu')
+opt.register_specialize(crossentropy_to_crossentropy_with_softmax_with_bias,
+                        'gpu')
 @gof.optimizer
@@ -1397,7 +1406,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
 optdb.register('crossentropy_to_crossentropy_with_softmax',
               crossentropy_to_crossentropy_with_softmax, 2.01,
-               'fast_run', 'xent')
+               'fast_run', 'xent', 'gpu')
 @gof.local_optimizer([softmax_grad])
@@ -1410,10 +1419,10 @@ def local_crossentropy_to_crossentropy_with_softmax_grad(node):
            dx = crossentropy_softmax_1hot_with_bias_dx(g_nll,
                 coding_dist, true_one_of_n)
            return [dx]
-opt.register_specialize(local_crossentropy_to_crossentropy_with_softmax_grad)
+opt.register_specialize(local_crossentropy_to_crossentropy_with_softmax_grad,
+                        'gpu')
-@opt.register_specialize
 @gof.local_optimizer([tensor._max_and_argmax])
 def local_argmax_pushdown(node):
    if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
@@ -1444,6 +1453,7 @@ def local_argmax_pushdown(node):
                                          tensor.DimShuffle(
                                              pre_bias.broadcastable,
                                              ('x', 0))(pre_bias), axis)
+opt.register_specialize(local_argmax_pushdown, 'gpu')
 # Utility function used by the two next optimizations
@@ -1499,7 +1509,6 @@ def _is_const(z, val, approx=False):
        return numpy.all(maybe == val)
-@opt.register_specialize
 @gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
 def local_advanced_indexing_crossentropy_onehot(node):
    log = None
@@ -1538,9 +1547,9 @@ def local_advanced_indexing_crossentropy_onehot(node):
                return [-crossentropy_softmax_argmax_1hot_with_bias(x_var,
                                                                    b_var,
                                                                    labels)[0]]
+opt.register_specialize(local_advanced_indexing_crossentropy_onehot, 'gpu')
-@opt.register_specialize
 @gof.local_optimizer([softmax_grad])
 def local_advanced_indexing_crossentropy_onehot_grad(node):
    if not (node.op == softmax_grad):
@@ -1761,9 +1770,10 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
        return [crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)]
    else:
        return
+opt.register_specialize(local_advanced_indexing_crossentropy_onehot_grad,
+                        'gpu')
-@opt.register_specialize
 @gof.local_optimizer([softmax_with_bias])
 def graph_merge_softmax_with_crossentropy_softmax(node):
    if node.op == softmax_with_bias:
@@ -1775,6 +1785,8 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
                    xx, bb, ll = big_client.inputs
                    mergeable_client = big_client.op(x, b, ll)
                    return [mergeable_client[1]]
+opt.register_specialize(graph_merge_softmax_with_crossentropy_softmax,
+                        'gpu')
 def binary_crossentropy(output, target):
@@ -1969,4 +1981,4 @@ local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')),
 #don't do register_stabilize, this is to make local_log_softmax run
 #only after another more specific optimization that stabilizes cross entropy
 #opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
-opt.register_specialize(local_log_softmax, name='local_log_softmax')
+opt.register_specialize(local_log_softmax, name='local_log_softmax', 'gpu')