Merge pull request #2140 from nouiz/mixed

[ENH] Add SliceConstant, better stack trace, fast_compile_gpu

Merge pull request #2140 from nouiz/mixed
bb899b3e · abergeron · 77c4f4d1 · e08c57b3 · bb899b3e · bb899b3e
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -93,7 +93,13 @@ OPT_NONE = gof.Query(include=[], exclude=exclude)
 OPT_MERGE = gof.Query(include=['merge'], exclude=exclude)
 OPT_FAST_RUN = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
-OPT_FAST_COMPILE = gof.Query(include=['fast_compile'], exclude=exclude)
+# We need fast_compile_gpu here.  As on the GPU, we don't have all
+# operation that exist in fast_compile, but have some that get
+# introduced in fast_run, we want those optimization to also run in
+# fast_compile+gpu. We can't tag them just as 'gpu', as this would
+# exclude them if we exclude 'gpu'.
+OPT_FAST_COMPILE = gof.Query(include=['fast_compile', 'fast_compile_gpu'],
+                             exclude=exclude)
 OPT_STABILIZE = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_STABILIZE.position_cutoff = 1.5000001
 OPT_NONE.name = 'OPT_NONE'
@@ -191,7 +197,7 @@ optdb.register('Print1.51', PrintCurrentFunctionGraph('Post-stabilize'),
 # misc special cases for speed
 optdb.register('specialize', gof.EquilibriumDB(),
-        2, 'fast_run')
+        2, 'fast_run', 'fast_compile_gpu')
 # misc special cases for speed that break canonicalization
 optdb.register('uncanonicalize', gof.EquilibriumDB(),

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -8,6 +8,7 @@ import logging
 import pdb
 import sys
 import time
+import warnings
 import numpy
@@ -731,7 +732,8 @@ def pre_constant_merge(vars):
    seen_var = set()
    # signature -> variable (for constants)
    const_sig_inv = {}
+    if isinstance(vars, graph.Variable):
+        vars = [vars]
    def recursive_merge(var):
        if var in seen_var:
            return var
@@ -747,6 +749,10 @@ def pre_constant_merge(vars):
                    return const_sig_inv[sig]
                const_sig_inv[sig] = var
            except TypeError:  # unhashable type
+                warnings.warn(
+                    "We work around a problem, the following variable"
+                    " signature isn't hashable. Please, report this to"
+                    " theano-dev so that the better fix is done. %s" % var)
                # Some python object like slice aren't hashable. So
                # don't merge them here.
                pass

--- a/theano/gof/tests/test_opt.py
+++ b/theano/gof/tests/test_opt.py
@@ -409,3 +409,20 @@ class TestEquilibrium(object):
            _logger.setLevel(oldlevel)
        #print 'after', g
        assert str(g) == '[Op1(x, y)]'
+def test_pre_constant_merge_slice():
+    ms = theano.tensor.type_other.MakeSlice()(1)
+    pre_constant_merge([ms])
+    const_slice = theano.tensor.type_other.SliceConstant(
+        type=theano.tensor.type_other.slicetype,
+        data=slice(1, None, 2))
+    adv = theano.tensor.subtensor.AdvancedSubtensor()(theano.tensor.matrix(),
+                                                      [2, 3], const_slice)
+    pre_constant_merge(adv)
+    cst = pre_greedy_local_optimizer([theano.tensor.opt.constant_folding], ms)
+    assert isinstance(cst, theano.tensor.type_other.SliceConstant)
+    # Make sure constant of slice signature is hashable.
+    hash(cst.signature())
--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
@@ -18,6 +18,18 @@ def add_tag_trace(thing):
    # rid of it.  We also want to get rid of the add_tag_trace call.
    if tr and "add_tag_trace" in tr[-1][-1]:
        tr = tr[:-1]
+    while tr:
+        file_path = tr[-1][0]
+        rm = False
+        for p in ["theano/tensor/",
+                  "theano/gof/"]:
+            if p in file_path:
+                tr = tr[:-1]
+                rm = True
+                break
+        if not rm:
+            break
    thing.tag.trace = tr
    return thing

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -577,7 +577,7 @@ class Softmax(gof.Op):
 softmax = Softmax()
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax])
 def local_softmax_with_bias(node):
    """Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
@@ -1330,8 +1330,8 @@ class CrossentropyCategorical1Hot(gof.Op):
 crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
-@opt.register_stabilize('gpu')
+@opt.register_stabilize('fast_compile_gpu')
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.optimizer
 def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
    """This is a stabilization optimization
@@ -1404,10 +1404,10 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
 optdb.register('crossentropy_to_crossentropy_with_softmax',
               crossentropy_to_crossentropy_with_softmax, 2.01,
-               'fast_run', 'xent', 'gpu')
+               'fast_run', 'xent', 'fast_compile_gpu')
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_grad])
 def local_crossentropy_to_crossentropy_with_softmax_grad(node):
    if node.op == softmax_grad:
@@ -1420,7 +1420,7 @@ def local_crossentropy_to_crossentropy_with_softmax_grad(node):
            return [dx]
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([tensor._max_and_argmax])
 def local_argmax_pushdown(node):
    if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
@@ -1506,7 +1506,7 @@ def _is_const(z, val, approx=False):
        return numpy.all(maybe == val)
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
 def local_advanced_indexing_crossentropy_onehot(node):
    log = None
@@ -1547,7 +1547,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
                                                                    labels)[0]]
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_grad])
 def local_advanced_indexing_crossentropy_onehot_grad(node):
    if not (node.op == softmax_grad):
@@ -1770,7 +1770,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
        return
-@opt.register_specialize('gpu')
+@opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_with_bias])
 def graph_merge_softmax_with_crossentropy_softmax(node):
    if node.op == softmax_with_bias:
@@ -1976,4 +1976,4 @@ local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')),
 #don't do register_stabilize, this is to make local_log_softmax run
 #only after another more specific optimization that stabilizes cross entropy
 #opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
-opt.register_specialize(local_log_softmax, 'gpu', name='local_log_softmax')
+opt.register_specialize(local_log_softmax, 'fast_compile_gpu', name='local_log_softmax')
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -338,7 +338,8 @@ def register_specialize(lopt, *tags, **kwargs):
        return register
    else:
        name = (kwargs and kwargs.pop('name')) or lopt.__name__
-        compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
+        compile.optdb['specialize'].register(name, lopt, 'fast_run',
+                                             'fast_compile_gpu', *tags)
        return lopt
@@ -1319,7 +1320,7 @@ def local_track_shape_i(node):
 @register_specialize
-@register_canonicalize('gpu')
+@register_canonicalize('fast_compile_gpu')
 @gof.local_optimizer([Subtensor])
 def local_subtensor_make_vector(node):
    # replace all subtensor(make_vector) like:

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -1807,6 +1807,8 @@ def as_index_variable(idx):
        return NoneConst.clone()
    if isinstance(idx, slice):
        return make_slice(idx)
+    if isinstance(idx, gof.Variable) and isinstance(idx.type, SliceType):
+        return idx
    idx = theano.tensor.as_tensor_variable(idx)
    if idx.type.dtype[:3] not in ('int', 'uin'):
        raise TypeError('index must be integers')

--- a/theano/tensor/type_other.py
+++ b/theano/tensor/type_other.py
 #
 # Slice type and Op. None Type and NoneConst.
 #
+import numpy
 import theano
 from theano.gof import Apply, Constant, Generic, Op, Type, hashtype
 from theano.gradient import DisconnectedType
@@ -76,6 +79,35 @@ class SliceType(Type):
 slicetype = SliceType()
+class SliceConstant(Constant):
+    def __init__(self, type, data, name=None):
+        assert isinstance(data, slice)
+        # Numpy ndarray aren't hashable, so get rid of them.
+        if isinstance(data.start, numpy.ndarray):
+            assert data.start.ndim == 0
+            assert "int" in str(data.start.dtype)
+            data = slice(int(data.start), data.stop, data.step)
+        elif isinstance(data.stop, numpy.ndarray):
+            assert data.stop.ndim == 0
+            assert "int" in str(data.stop.dtype)
+            data = slice(data.start, int(data.stop), data.step)
+        elif isinstance(data.step, numpy.ndarray):
+            assert data.step.ndim == 0
+            assert "int" in str(data.step.dtype)
+            data = slice(data.start, int(data.stop), data.step)
+        Constant.__init__(self, type, data, name)
+    def signature(self):
+        return (SliceConstant, self.data.start, self.data.stop, self.data.step)
+    def __str__(self):
+        return "%s{%s, %s, %s}" % (self.__class__.__name__,
+                                   self.data.start,
+                                   self.data.stop,
+                                   self.data.step)
+SliceType.Constant = SliceConstant
 class NoneTypeT(Generic):
    """
    Inherit from Generic to have c code working.

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -600,6 +600,7 @@ class TensorVariable(_tensor_py_operators, Variable):
                            x = x[:-1]
                            nb_rm += 1
                            rm = True
+                            break
                    if not rm:
                        break
                warnings.warn(msg, stacklevel=1 + nb_rm)