Merge remote-tracking branch 'upstream/master'

7efe043e · bbabeshkin · 54b461ff · 4b27770c · 7efe043e · 7efe043e
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,9 +15,7 @@ before_install:
  - conda update --yes conda

 install:
-# We support scipy 0.7.2, but it is not available on conda.
-# So we test with 0.11. Our internal buildbot have 0.7.2.
-  - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv mkl python=2.6 numpy=1.6 scipy=0.11 nose=1.1 pyparsing=1.5 pip; fi
+  - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv mkl python=2.6 numpy=1.6.2 scipy=0.11 nose=1.1 pyparsing=1.5 pip; fi
  - if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then conda create --yes -q -n pyenv mkl python=3.3 numpy=1.9.1 scipy=0.14.0 nose=1.3.4 pip; fi
  - source activate pyenv
  - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -23,7 +23,8 @@ instructions below for detailed installation steps):
    Python_ >= 2.6
        The development package (``python-dev`` or ``python-devel``
        on most Linux distributions) is recommended (see just below).
-	Python 2.4 was supported up to and including the release 0.6.
+        Python 2.4 was supported up to and including the release 0.6.
+        Python 3 is supported via 2to3 only, starting from 3.3.

    ``g++``, ``python-dev``
        Not technically required but *highly* recommended, in order to compile
@@ -32,16 +33,13 @@ instructions below for detailed installation steps):
        g++ >= 4.2 (for openmp that is currently always used)
        more recent version recommended!

-    `NumPy <http://numpy.scipy.org/>`_ >= 1.5.0
+    `NumPy <http://numpy.scipy.org/>`_ >= 1.6.2
        Earlier versions could work, but we don't test it.

-    `SciPy <http://scipy.org>`_
+    `SciPy <http://scipy.org>`_ >= 0.11
        Only currently required for sparse matrix and special functions
-        support, but *highly* recommended. We recommend SciPy
-        >=0.8 if you are using sparse matrices, because ``scipy.sparse``
-        is buggy in 0.6 (the ``scipy.csc_matrix`` version of ``dot()`` has a
-        bug with singleton dimensions, there may be more bugs) and we do not
-        run tests with 0.7.
+        support, but *highly* recommended. SciPy >=0.8 could work,
+        but earlier versions have known bugs with sparse matrices.

    A `BLAS`_ installation (with Level 3 functionality)
        Including the development headers (``-dev``, ``-devel``, depending on

--- a/setup.py
+++ b/setup.py
@@ -43,12 +43,11 @@ Operating System :: POSIX
 Operating System :: Unix
 Operating System :: MacOS
 Programming Language :: Python :: 2
-Programming Language :: Python :: 2.4
-Programming Language :: Python :: 2.5
 Programming Language :: Python :: 2.6
 Programming Language :: Python :: 2.7
 Programming Language :: Python :: 3
 Programming Language :: Python :: 3.3
+Programming Language :: Python :: 3.4
 """
 NAME                = 'Theano'
 MAINTAINER          = "LISA laboratory, University of Montreal"
@@ -175,7 +174,7 @@ def do_setup():
          license=LICENSE,
          platforms=PLATFORMS,
          packages=find_packages(),
-          install_requires=['numpy>=1.5.0', 'scipy>=0.7.2'],
+          install_requires=['numpy>=1.6.2', 'scipy>=0.11'],
          package_data={
              '': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
                   '*.h', 'ChangeLog'],

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -3434,6 +3434,9 @@ class CopyOnNegativeStrides(GpuOp):
            i = i.copy()
        out[0][0] = i

+    def infer_shape(self, node, xshp):
+        return xshp
+
    def c_code(self, node, name, inp, out, sub):
        input, = inp
        z, = out

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -649,12 +649,19 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """
+    def contig_version(var):
+        if version() == -1:
+            var = gpu_contiguous(var)
+        else:
+            var = cp_on_negative_strides(var)
+        return var
+
    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    if (border_mode == 'valid' and subsample == (1,1) and
        direction_hint == 'bprop weights'):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
        # up a suitable 'fake' convolution to compute the gradient for.
-        img = cp_on_negative_strides(img.dimshuffle(1, 0, 2, 3))
+        img = contig_version(img.dimshuffle(1, 0, 2, 3))
        if conv_mode == 'conv':
            # We need to flip manually. These 'kerns' are not the kernels
            # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
@@ -674,7 +681,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        # Special case: We can be faster by using GpuDnnConvGradI to compute
        # the full convolution as the backward pass of a valid convolution.
        # We just need to set up a suitable 'fake' valid convolution.
-        img = cp_on_negative_strides(img)
+        img = gpu_contiguous(img)  # cudnn v1 and v2 rc3 need contiguous data
        kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
        conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
        shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
@@ -686,9 +693,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        return GpuDnnConvGradI()(kerns, img, out, desc)

    # Standard case: We use GpuDnnConv with suitable padding.
-    # cp_on_negative_strides will return a gpu_contiguous copy
+    # contig_version will return a gpu_contiguous copy
    # if the img contains negative strides
-    img = cp_on_negative_strides(img)
+    img = contig_version(img)
    kerns = gpu_contiguous(kerns)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img.shape, kerns.shape)
@@ -746,7 +753,7 @@ class GpuDnnPoolDesc(GpuOp):
        self.stride = stride
        assert len(stride) == 2
        self.pad = pad
-        if (pad[0] != 0 or pad[1] != 0) and version() < 20:
+        if (pad[0] != 0 or pad[1] != 0) and version() == -1:
            raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")

    def __setstate__(self, d):
@@ -755,7 +762,7 @@ class GpuDnnPoolDesc(GpuOp):
            self.pad = (0, 0)

    def make_node(self):
-        if self.pad != (0, 0) and version() < 20:
+        if self.pad != (0, 0) and version() == -1:
            raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")

        return Apply(self, [],

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1763,7 +1763,7 @@ def get_device_type_sizes():
        del t
    except Exception, e:
        _logger.warning(("Optimization Warning: "
-            "Got the following error, but we can ignore it. "
+            "Got the following error, but you can ignore it. "
            "This could cause less GpuElemwise fused together.\n"
            "%s") % e)


--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -70,7 +70,7 @@ def test_pooling():
    x = T.ftensor4()
    for func, pad in product((T.max, T.mean),
                             ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
-        if pad != (0, 0) and cuda.dnn.version() < 20:
+        if pad != (0, 0) and cuda.dnn.version() == -1:
            continue

        if pad != (0, 0) and func is T.mean: