Merge pull request #3205 from nouiz/mixed2

Mixed stuff

Merge pull request #3205 from nouiz/mixed2
514c7de4 · carriepl · 6f4542f8 · bbcc6970 · 514c7de4 · 514c7de4
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,18 +31,19 @@ install:
  - pip install . --no-deps
 # command to run tests
-env:
-  - PART="sandbox sparse scalar tensor/nnet scan_module"
-  - PART="tensor/tests/test_basic.py tensor/signal compile gof misc tests compat"
-  - PART="-e test_basic.py tensor/tests"
 matrix:
+  fast_finish: true
  include:
    - python: "3.3"
      env: PART="." THEANO_FLAGS="mode=FAST_COMPILE"
    - python: "2.6"
      env: PART="." THEANO_FLAGS="mode=FAST_COMPILE,floatX=float32"
+env:
+  - PART="sandbox sparse scalar tensor/nnet scan_module"
+  - PART="tensor/tests/test_basic.py tensor/signal compile gof misc tests compat"
+  - PART="-e test_basic.py tensor/tests"
 script:
  - export THEANO_FLAGS=$THEANO_FLAGS,warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc.cxxflags=-pipe
  - python --version

--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -40,6 +40,12 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
    that, you can set to replace shared variables values by zeros by
    calling set_value(...) on them before calling `function_dump`.
+    To load such a dump and do the compilation:
+    >>> import cPickle, theano
+    >>> d=cPickle.load(open("func_dump.bin", "rb"))
+    >>> f=theano.function(**d)
    """
    assert isinstance(filename, string_types)
    d = dict(inputs=inputs, outputs=outputs, mode=mode, updates=updates,

--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -263,7 +263,7 @@ class Variable(Node):
    - `TensorVariable` subclass of Variable that represents a numpy.ndarray object
-    - `SharedTensorVariable` Shared version of TensorVariable
+    - `TensorSharedVariable` Shared version of TensorVariable
    - `SparseVariable` subclass of Variable that represents a scipy.sparse.{csc,csr}_matrix object

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -1803,6 +1803,12 @@ class GpuConv(GpuOp):
            self.max_threads_dim0 = None
        if not hasattr(self, "direction_hint"):
            self.direction_hint = None
+        if not hasattr(self, "nkern"):
+            self.nkern = None
+        if not hasattr(self, "bsize"):
+            self.bsize = None
+        if not hasattr(self, "fft_opt"):
+            self.fft_opt = True
    def __hash__(self):
        # don't use hash(self.version) as hash(-1)==-2 and

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -231,6 +231,9 @@ class GpuDnnConvDesc(GpuOp):
    def c_compiler(self):
        return NVCC_compiler
+    def do_constant_folding(self, node):
+        return False
    def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
        if isinstance(border_mode, int):
            border_mode = (border_mode, border_mode)

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -42,13 +42,30 @@ def test_dnn_conv_desc_merge():
    # CDataType is not DeepCopyable so this will crash if we don't use
    # borrow=True
    f = theano.function([], [theano.Out(desc1, borrow=True),
-                             theano.Out(desc2, borrow=True)])
+                             theano.Out(desc2, borrow=True)],
+                        mode=mode_with_gpu)
    d1, d2 = f()
    # This will be the case if they are merged, which would be bad.
    assert d1 != d2
+    desc1v2 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
+                                 conv_mode='conv')(img_shp, kern_shp)
+    f = theano.function([], [theano.Out(desc1, borrow=True),
+                             theano.Out(desc1v2, borrow=True)],
+                        mode=mode_with_gpu)
+    assert len([n for n in f.maker.fgraph.apply_nodes
+                if isinstance(n.op, dnn.GpuDnnConvDesc)]) == 1
+    # CDATA type don't equal even if they represent the same object
+    # So we can't use debugmode with it.
+    if theano.config.mode not in ["DebugMode", "DEBUG_MODE"]:
+        d1, d2 = f()
+        # They won't be equal if they aren't merged.
+        assert d1 == d2
 def test_dnn_conv_merge():
    """This test that we merge correctly multiple dnn_conv.

--- a/theano/sandbox/gpuarray/conv.py
+++ b/theano/sandbox/gpuarray/conv.py
@@ -27,15 +27,23 @@ class GpuConv(gof.Op):
            logical_kern_hw=None,
            logical_kern_align_top=True,
            version=-1,
+            direction_hint=None,
            verbose=0,
            kshp=None,
            imshp=None,
-            max_threads_dim0=None):
+            max_threads_dim0=None,
+            nkern=None,
+            bsize=None,
+            fft_opt=True):
        """
        :param version: each version of c_code implements many kernels for the
                        convolution. By default we try to guess the best one.
                        You can force one version with this parameter. This
                        parameter is used by the tests.
+        :param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
+                        Serves as a hint for graph optimizers replacing
+                        GpuConv by other implementations. If the GpuConv is
+                        inserted automatically, we take its value from ConvOp.
        :param verbose: for value of 1,2 and 3. Print more information during
                        the execution of the convolution. Mostly used for
                        optimization or debugging.
@@ -49,6 +57,19 @@ class GpuConv(gof.Op):
        :param max_threads_dim0: The maximum number of threads for the
                        block size dimensions 0 (blockDim.x) used by the
                        GPU function.
+        :param nkern:   The number of kernels. Not used for this op, but can be
+                        used by graph optimizers to select a more optimal
+                        convolution implementation. If the GpuConv op is inserted
+                        automatically, we take its value from the Conv op.
+        :param bsize:   The batch size. Not used for this op, but can be
+                        used by graph optimizers to select a more optimal
+                        convolution implementation. If the GpuConv op is inserted
+                        automatically, we take its value from the Conv op.
+        :param fft_opt: deactivate fft_opt optimization at the op level when
+                        set to False. Note that by default fft optimization
+                        aren't enabled. See
+                        :ref:`convolution documentation <libdoc_tensor_nnet_conv>`
+                        to enable them.
        """
        self.border_mode = border_mode
@@ -69,10 +90,14 @@ class GpuConv(gof.Op):
        self.logical_kern_hw = logical_kern_hw
        self.logical_kern_align_top = logical_kern_align_top
        self.version = version
+        self.direction_hint = direction_hint
        self.verbose = verbose
        self.kshp = kshp
        self.imshp = imshp
        self.max_threads_dim0 = max_threads_dim0
+        self.nkern = nkern
+        self.bsize = bsize
+        self.fft_opt = fft_opt
    def __eq__(self, other):
        return type(self) == type(other) \
@@ -93,6 +118,14 @@ class GpuConv(gof.Op):
            self.imshp = None
        if not hasattr(self, "max_threads_dim0"):
            self.max_threads_dim0 = None
+        if not hasattr(self, "direction_hint"):
+            self.direction_hint = None
+        if not hasattr(self, "nkern"):
+            self.nkern = None
+        if not hasattr(self, "bsize"):
+            self.bsize = None
+        if not hasattr(self, "fft_opt"):
+            self.fft_opt = True
    def __hash__(self):
        # don't use hash(self.version) as hash(-1)==-2 and

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -669,8 +669,12 @@ def local_gpu_conv(node):
                      logical_kern_align_top=op.kshp_logical_top_aligned,
                      kshp=op.kshp,
                      version=op.version,
+                      direction_hint=op.direction_hint,
                      verbose=op.verbose,
                      imshp=op.imshp,
+                      nkern=op.nkern,
+                      bsize=op.bsize,
+                      fft_opt=op.fft_opt
                      )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]

--- a/theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
@@ -27,6 +27,8 @@ from .test_basic_ops import (mode_with_gpu,
                             mode_without_gpu)
 from ..type import GpuArrayType
 from ..conv import GpuConv
+from theano.sandbox.gpuarray import dnn
 import pygpu
 gftensor4 = GpuArrayType('float32', [False] * 4)
@@ -501,6 +503,9 @@ def test_subsample():
 class TestConv2DGPU(unittest.TestCase):
+    conv_ops = (GpuConv,
+                dnn.DnnBase)
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
@@ -527,7 +532,7 @@ class TestConv2DGPU(unittest.TestCase):
            func = theano.function([a, A], image_estimate, mode=mode_with_gpu)
            # theano.printing.debugprint(func,)
-            assert any([isinstance(node.op, GpuConv)
+            assert any([isinstance(node.op, self.conv_ops)
                        for node in func.maker.fgraph.toposort()])
            a_in = numpy.random.randn(*featshp).astype("float32")

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1239,7 +1239,7 @@ class ShapeFeature(object):
                    continue
                if shpnode.outputs[0] in theano.gof.graph.ancestors([repl]):
-                    raise AssertionError(
+                    raise InconsistencyError(
                        "This substitution would insert a cycle in the graph:"
                        "node: %s, i: %i, r: %s, new_r: %s"
                        % (node, i, r, new_r))