Merge pull request #4747 from nouiz/mixed_opt

Small mixed changes.

Merge pull request #4747 from nouiz/mixed_opt
884ed6be · abergeron · GitHub · d5944c96 · ad25509e · 884ed6be
--- a/doc/dev_start_guide.txt
+++ b/doc/dev_start_guide.txt
@@ -15,7 +15,10 @@ Looking for an idea for a first contribution? Check `github issue
 with a label ``easy fix``. They are good starter. It is recommanded
 that you write on the issue you want to work on it. This help make
 sure it is up to date and see if nobody else is working on it. Also,
-we can sometimes provides more information about it.
+we can sometimes provides more information about it.  There is also
+the label `NeedSomeoneToFinish
+<https://github.com/Theano/Theano/labels/NeedSomeoneToFinish>` that is
+interresting to check. The difficulty level is variable.
 Resources
 =========

--- a/theano/d3viz/formatting.py
+++ b/theano/d3viz/formatting.py
@@ -25,8 +25,12 @@ except ImportError:
    try:
        # fall back on pydot if necessary
        import pydot as pd
+        if hasattr(pd, 'find_graphviz'):
            if pd.find_graphviz():
                pydot_imported = True
+        else:
+            pd.Dot.create(pd.Dot())
+            pydot_imported = True
    except ImportError:
        pass  # tests should not fail on optional dependency

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -32,7 +32,7 @@ from .elemwise import GpuElemwise
 # These don't exist in gpuarray
 # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
 from .nnet import GpuSoftmax
-from .opt import (gpu_seqopt, register_opt, conv_groupopt,
+from .opt import (gpu_seqopt, register_opt,
                  op_lifter, register_opt2)
 from .opt_util import alpha_merge, output_merge, inplace_allocempty
@@ -1472,18 +1472,31 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
    return [rval]
-@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
+@register_opt('fast_compile', 'conv_dnn', 'cudnn')
-                  AbstractConv2d_gradInputs])
+@local_optimizer([AbstractConv2d])
 def local_abstractconv_cudnn(node):
    ctx = infer_context_name(*node.inputs)
    if not isinstance(node.inputs[0].type, GpuArrayType):
        return
    return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
-conv_groupopt.register('local_abstractconv_cudnn',
-                       local_abstractconv_cudnn, 20,
+@register_opt('fast_compile', 'conv_dnn', 'cudnn')
-                       'fast_compile', 'fast_run',
+@local_optimizer([AbstractConv2d_gradWeights])
-                       'gpuarray', 'conv_dnn', 'cudnn')
+def local_abstractconv_gw_cudnn(node):
+    ctx = infer_context_name(*node.inputs)
+    if not isinstance(node.inputs[0].type, GpuArrayType):
+        return
+    return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
+@register_opt('fast_compile', 'conv_dnn', 'cudnn')
+@local_optimizer([AbstractConv2d_gradInputs])
+def local_abstractconv_gi_cudnn(node):
+    ctx = infer_context_name(*node.inputs)
+    if not isinstance(node.inputs[0].type, GpuArrayType):
+        return
+    return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
 @inplace_allocempty(GpuDnnConv, 2)

--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -51,13 +51,15 @@ class GpuElemwise(HideC, Elemwise):
    def make_node(self, *inputs):
        ctx_name = infer_context_name(*inputs)
-        res = Elemwise.make_node(self, *inputs)
+        inputs = [as_gpuarray_variable(i, ctx_name) for i in inputs]
-        outputs = [GpuArrayType(broadcastable=o.type.broadcastable,
+        out_info = Elemwise.get_output_info(self, GpuDimShuffle, *inputs)
+        inputs = out_info[2]
+        outputs = [GpuArrayType(broadcastable=br,
                                context_name=ctx_name,
-                                dtype=o.type.dtype)() for o in res.outputs]
+                                dtype=dtype)() for dtype, br in
+                   zip(out_info[0], out_info[1])]
        if len(outputs) > 1:
            raise NotImplementedError()
-        inputs = [as_gpuarray_variable(i, ctx_name) for i in inputs]
        node = Apply(self, inputs, outputs)
        # Try to generate the kernel to catch SupportCodeErrors

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -14,7 +14,6 @@ from theano.compile.ops import shape_i
 from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
                        SequenceDB, Optimizer, DB, toolbox, graph)
 from theano.gof.opt import NavigatorOptimizer
-from theano.gof.optdb import LocalGroupDB
 from theano.ifelse import IfElse
 from theano.misc.ordered_set import OrderedSet
@@ -79,10 +78,6 @@ class GraphToGPUDB(DB):
 gpu_seqopt = SequenceDB()
-# Don't register this right now
-conv_groupopt = LocalGroupDB()
-conv_groupopt.__name__ = "gpua_conv_opts"
 gpu_seqopt.register('gpuarray_graph_optimization', GraphToGPUDB(), -0.5,
                    'fast_compile', 'fast_run', 'gpuarray')
@@ -1297,9 +1292,6 @@ def local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs):
                                   context_name=context_name)
    return [op(*inps)]
-# Register this here so that it goes after the abstract lifting
-register_opt('fast_compile')(conv_groupopt)
 @register_opt("low_memory")
 @local_optimizer([GpuCAReduceCuda])

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -35,10 +35,14 @@ except ImportError:
    try:
        # fall back on pydot if necessary
        import pydot as pd
+        if hasattr(pd, 'find_graphviz'):
            if pd.find_graphviz():
                pydot_imported = True
            else:
                pydot_imported_msg = "pydot can't find graphviz"
+        else:
+            pd.Dot.create(pd.Dot())
+            pydot_imported = True
    except ImportError:
        # tests should not fail on optional dependency
        pydot_imported_msg = "Install the python package pydot or pydot-ng."

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -544,13 +544,11 @@ second dimension
                                          self.scalar_op.nout)
        self._rehash()
-    def make_node(self, *inputs):
+    def get_output_info(self, dim_shuffle, *inputs):
-        """
+        """Return the outputs dtype and broadcastable pattern and the
-        If the inputs have different number of dimensions, their shape
+        dimshuffled niputs.
-        is left-completed to the greatest number of dimensions with 1s
-        using DimShuffle.
        """
-        inputs = list(map(as_tensor_variable, inputs))
        shadow = self.scalar_op.make_node(
            *[get_scalar_type(dtype=i.type.dtype).make_variable()
              for i in inputs])
@@ -565,7 +563,7 @@ second dimension
                args.append(input)
            else:
                # TODO: use LComplete instead
-                args.append(DimShuffle(
+                args.append(dim_shuffle(
                    input.type.broadcastable,
                    ['x'] * difference + list(range(length)),
                    inplace=False)(input))
@@ -601,7 +599,18 @@ second dimension
            raise TypeError((
                "Cannot do an inplace operation on incompatible data types.",
                ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))
+        assert len(out_dtypes) == len(out_broadcastables)
+        return out_dtypes, out_broadcastables, inputs
+    def make_node(self, *inputs):
+        """
+        If the inputs have different number of dimensions, their shape
+        is left-completed to the greatest number of dimensions with 1s
+        using DimShuffle.
+        """
+        inputs = list(map(as_tensor_variable, inputs))
+        out_dtypes, out_broadcastables, inputs = self.get_output_info(
+            DimShuffle, *inputs)
        outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
                   for dtype, broadcastable in izip(out_dtypes,
                                                    out_broadcastables)]

--- a/theano/tensor/nlinalg.py
+++ b/theano/tensor/nlinalg.py
@@ -685,6 +685,14 @@ class lstsq(Op):
 def matrix_power(M, n):
+    """
+    Raise a square matrix to the (integer) power n.
+    Parameters
+    ----------
+    M : Tensor variable
+    n : Python int
+    """
    result = 1
    for i in xrange(n):
        result = theano.dot(result, M)