Merge pull request #556 from lamblin/fix_cudandarray_python24

Fix a number of tests for Python 2.4

Merge pull request #556 from lamblin/fix_cudandarray_python24
36a01bb6 · nouiz · 9c9fe6cc · f7c304cd · 36a01bb6 · 36a01bb6
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -5,6 +5,8 @@ import theano
 from theano import Op, Type, Apply, Variable, Constant
 from theano import tensor, scalar, config
+from theano.gof.python25 import all, any
 from theano.sandbox.cuda import GpuOp
 from theano.sandbox.cuda.type import CudaNdarrayType
 from theano.sandbox.cuda import filter as type_support_filter
@@ -1754,7 +1756,17 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
        def convert(entry):
            if isinstance(entry, Type):
-                return indices.pop()
+                rval = indices.pop()
+                if sys.version_info < (2, 5):
+                    # Before Python 2.5, PySlice_GetIndicesEx requires
+                    # Python int to be passed.
+                    rval_ = int(rval)
+                    if rval_ != rval:
+                        raise IndexError((
+                            "Invalid value for indexing: %s. "
+                            "That value may be too big.") % rval)
+                    return rval_
+                return rval
            elif isinstance(entry, slice):
                return slice(convert(entry.start),
                             convert(entry.stop),

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -1521,6 +1521,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
    }
    if (PySlice_Check(key)) //INDEXING BY SLICE
    {
+        if (verbose) fprintf(stderr, "by slice\n");
        if (self->nd == 0)
        {
            PyErr_SetString(PyExc_ValueError, "cannot slice a 0-d array");
@@ -1531,6 +1532,8 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
        Py_ssize_t start, stop, step, slen;
        if (PySlice_GetIndicesEx((PySliceObject*)key, d_dim, &start, &stop, &step, &slen))
        {
+            if (verbose)
+                fprintf(stderr, "PySlice_GetIndicesEx failed\n");
            return NULL;
        }
        if (verbose)
@@ -1569,6 +1572,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
    }
    if (PyTuple_Check(key)) //INDEXING BY TUPLE
    {
+        if (verbose) fprintf(stderr, "by tuple\n");
        //elements of the tuple can be either integers or slices
        //the dimensionality of the view we will return is diminished for each slice in the tuple
@@ -2047,7 +2051,9 @@ GetDeviceMemInfo(PyObject* _unused, PyObject* dummy)
    cudaError_t err = cudaMemGetInfo(&free, &total);
    if (err != cudaSuccess){
-        PyErr_Format(PyExc_RuntimeError, "Error while getting memory info about the gpu %d");
+        PyErr_Format(PyExc_RuntimeError,
+                     "Error while getting memory info about the gpu: %s",
+                     cudaGetErrorString(err));
        return NULL;
    }
    return PyTuple_Pack(2, PyLong_FromLong(free), PyLong_FromLong(total));
@@ -2932,6 +2938,8 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
    // If matrix A or B has non-unit size and non-unit stride in both
    // dimensions, we can make a copy.
+    CudaNdarray * A_new = NULL;
+    CudaNdarray * B_new = NULL;
    if (((CudaNdarray_HOST_DIMS(A)[0] > 1)
         && (CudaNdarray_HOST_STRIDES(A)[0] != 1)
         && (CudaNdarray_HOST_DIMS(A)[1] > 1)
@@ -2939,18 +2947,11 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
        || (CudaNdarray_HOST_STRIDES(A)[0] < 0)
        || (CudaNdarray_HOST_STRIDES(A)[1] < 0))
    {
-        const CudaNdarray* A_new = (CudaNdarray*) CudaNdarray_Copy(A);
+        A_new = (CudaNdarray*) CudaNdarray_Copy(A);
        if (!A_new)
            return -1;
        A = A_new;
    }
-    else
-    {
-        // In the case above, we will need to decref A_new at the end.
-        // To make things simpler, we incref A here, so we can always
-        // decref A.
-        Py_INCREF(A);
-    }
    if (((CudaNdarray_HOST_DIMS(B)[0] > 1)
         && (CudaNdarray_HOST_STRIDES(B)[0] != 1)
@@ -2959,21 +2960,15 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
        || (CudaNdarray_HOST_STRIDES(B)[0] < 0)
        || (CudaNdarray_HOST_STRIDES(B)[1] < 0))
    {
-        const CudaNdarray* B_new = (CudaNdarray*) CudaNdarray_Copy(B);
+        B_new = (CudaNdarray*) CudaNdarray_Copy(B);
        if (!B_new)
        {
-            Py_XDECREF(A);
+            // If A_new is NULL, meaning A was not copied nothing happens
+            Py_XDECREF(A_new);
            return -1;
        }
        B = B_new;
    }
-    else
-    {
-        // In the case above, we will need to decref B_new at the end.
-        // To make things simpler, we incref B here, so we can always
-        // decref B.
-        Py_INCREF(B);
-    }
    // If matrix C has non-unit size and non-unit stride in both
    // dimensions, or negative strides, we can't operate. We cannot copy
@@ -2992,8 +2987,8 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
                     CudaNdarray_HOST_STRIDES(C)[1],
                     CudaNdarray_HOST_DIMS(C)[0],
                     CudaNdarray_HOST_DIMS(C)[1]);
-        Py_XDECREF(A);
+        Py_XDECREF(A_new);
-        Py_XDECREF(B);
+        Py_XDECREF(B_new);
        return -1;
    }
@@ -3057,8 +3052,8 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
        cublasSgemm(T0, T1, D0, D1, D2, a, x, sx, y, sy, b, z, sz); \
    } else { \
        PyErr_SetString(PyExc_AssertionError, "negative stride to sGemm");\
-        Py_XDECREF(A);\
+        Py_XDECREF(A_new);\
-        Py_XDECREF(B);\
+        Py_XDECREF(B_new);\
        return -1; \
    }
@@ -3076,8 +3071,8 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
                 return -1;
    };
    CNDA_THREAD_SYNC;
-    Py_XDECREF(A);
+    Py_XDECREF(A_new);
-    Py_XDECREF(B);
+    Py_XDECREF(B_new);
    cublasStatus err = cublasGetError();
    if (CUBLAS_STATUS_SUCCESS != err)
@@ -3117,6 +3112,8 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B,
    // If matrix A has non-unit size and non-unit stride in both
    // dimensions, or negative strides, we cannot operate, but we can
    // make a copy.
+    CudaNdarray * A_new = NULL;
+    CudaNdarray * B_new = NULL;
    if (((CudaNdarray_HOST_DIMS(A)[0] > 1)
         && (CudaNdarray_HOST_STRIDES(A)[0] != 1)
         && (CudaNdarray_HOST_DIMS(A)[1] > 1)
@@ -3124,33 +3121,25 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B,
        || (CudaNdarray_HOST_STRIDES(A)[0] < 0)
        || (CudaNdarray_HOST_STRIDES(A)[1] < 0))
    {
-        const CudaNdarray* A_new = (CudaNdarray*) CudaNdarray_Copy(A);
+        A_new = (CudaNdarray*) CudaNdarray_Copy(A);
        if (!A_new)
            return -1;
        A = A_new;
    }
-    else
-    {
-        // Incref A, so we can decref it at the end in all cases
-        Py_INCREF(A);
-    }
    // If vector B as a negative stride, we also have to make a copy.
    if (CudaNdarray_HOST_STRIDES(B)[0] < 0)
    {
-        const CudaNdarray* B_new = (CudaNdarray*) CudaNdarray_Copy(B);
+        B_new = (CudaNdarray*) CudaNdarray_Copy(B);
        if (!B_new)
        {
-            Py_XDECREF(A);
+            // If A was not copied, A_new is NULL, and Py_XDECREF does not
+            // do anything
+            Py_XDECREF(A_new);
            return -1;
        }
        B = B_new;
    }
-    else
-    {
-        // Incref B, so we can decref it at the end in all cases
-        Py_INCREF(B);
-    }
    // cudablas does not handle negative strides as expected
    if (   (CudaNdarray_HOST_STRIDES(A)[0] < 0)
@@ -3159,8 +3148,8 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B,
        PyErr_Format(PyExc_ValueError, "illegal strides in args to gemv (%i,%i)",
                CudaNdarray_HOST_STRIDES(A)[0],
                CudaNdarray_HOST_STRIDES(A)[1]);
-        Py_XDECREF(A);
+        Py_XDECREF(A_new);
-        Py_XDECREF(B);
+        Py_XDECREF(B_new);
        return -1;
    }
@@ -3215,15 +3204,15 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B,
                         CudaNdarray_HOST_DIMS(A)[1],
                         CudaNdarray_HOST_DIMS(B)[0],
                         CudaNdarray_HOST_DIMS(C)[0]);
-            Py_XDECREF(A);
+            Py_XDECREF(A_new);
-            Py_XDECREF(B);
+            Py_XDECREF(B_new);
            return -1;
        }
    }
    CNDA_THREAD_SYNC;
-    Py_XDECREF(A);
+    Py_XDECREF(A_new);
-    Py_XDECREF(B);
+    Py_XDECREF(B_new);
    cublasStatus err = cublasGetError();
    if (CUBLAS_STATUS_SUCCESS != err)
@@ -3253,7 +3242,7 @@ int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y,
    }
    int x_strides = CudaNdarray_HOST_STRIDES(x)[0];
-    const CudaNdarray * x_ = x;
+    CudaNdarray * x_new = NULL;
    if(x_strides == 0){
        if(CudaNdarray_HOST_DIMS(x)[0] != 1){
            PyErr_Format(PyExc_RuntimeError,
@@ -3264,24 +3253,27 @@ int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y,
        }
        x_strides = 1;
    } else if(x_strides < 0){
-        x_ = (CudaNdarray*)CudaNdarray_Copy(x);
+        x_new = (CudaNdarray*) CudaNdarray_Copy(x);
-        x_strides = CudaNdarray_HOST_STRIDES(x_)[0];
+        x = x_new;
+        x_strides = CudaNdarray_HOST_STRIDES(x)[0];
    }
    int y_strides = CudaNdarray_HOST_STRIDES(y)[0];
-    const CudaNdarray * y_ = y;
+    CudaNdarray * y_new = NULL;
    if(y_strides == 0){
        if(CudaNdarray_HOST_DIMS(y)[0] != 1){
            PyErr_Format(PyExc_RuntimeError,
                         "CudaNdarray_sger: Invalid input y (should not happen)."
                         " We received a CudaNdarray vector with a stride of 0"
                         " that has more than 1 elements!");
+            Py_XDECREF(x_new);
            return -1;
        }
        y_strides = 1;
    } else if(y_strides < 0){
-        y_ = (CudaNdarray*)CudaNdarray_Copy(y);
+        y_new = (CudaNdarray*) CudaNdarray_Copy(y);
-        y_strides = CudaNdarray_HOST_STRIDES(y_)[0];
+        y = y_new;
+        y_strides = CudaNdarray_HOST_STRIDES(y)[0];
    }
    // Create appropriate strides if A is a row or column vector
@@ -3297,8 +3289,8 @@ int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y,
                && (CudaNdarray_HOST_STRIDES(A)[1] > 0)))
        {
            cublasSger(CudaNdarray_HOST_DIMS(x)[0], CudaNdarray_HOST_DIMS(y)[0], alpha,
-                       CudaNdarray_DEV_DATA(x_), x_strides,
+                       CudaNdarray_DEV_DATA(x), x_strides,
-                       CudaNdarray_DEV_DATA(y_), y_strides,
+                       CudaNdarray_DEV_DATA(y), y_strides,
                       CudaNdarray_DEV_DATA(A), sa_1);
        }
        // Since Sger expects A in col-major, we invert x and y to fake this.
@@ -3307,8 +3299,8 @@ int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y,
                    && (CudaNdarray_HOST_STRIDES(A)[0] > 0)))
        {
            cublasSger(CudaNdarray_HOST_DIMS(y)[0], CudaNdarray_HOST_DIMS(x)[0], alpha,
-                       CudaNdarray_DEV_DATA(y_), y_strides,
+                       CudaNdarray_DEV_DATA(y), y_strides,
-                       CudaNdarray_DEV_DATA(x_), x_strides,
+                       CudaNdarray_DEV_DATA(x), x_strides,
                       CudaNdarray_DEV_DATA(A), sa_0);
        }
        // A has to be either c- or f-contiguous, with no negative strides
@@ -3316,14 +3308,14 @@ int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y,
        {
            PyErr_SetString(PyExc_NotImplementedError,
                            "non-contiguous A, or negative strides, in sger");
+            Py_XDECREF(x_new);
+            Py_XDECREF(y_new);
            return -1;
        }
    }
    CNDA_THREAD_SYNC;
-    if(x_ != x)
+    Py_XDECREF(x_new);
-        Py_DECREF(x_);
+    Py_XDECREF(y_new);
-    if(y_ != y)
-        Py_DECREF(y_);
    cublasStatus err = cublasGetError();
    if (CUBLAS_STATUS_SUCCESS != err)

--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -868,11 +868,15 @@ nd_collapse_[i]=0;
        #check that all inputs have valid dimensions
        emitted_inames = {}
-        for id,iname in enumerate(inputs):
+        for id, iname in enumerate(inputs):
            if iname in emitted_inames:
                assert emitted_inames[iname] is node.inputs[id]
                continue
-            broadcasts = ', '.join(map(str,map(int,node.inputs[id].broadcastable)))
+            # with python 2.4 (at least), if a broadcastable pattern is made of
+            # numpy.bool_ instead of bool, calling int() once is not enough.
+            broadcasts = map(int, map(int, node.inputs[id].broadcastable))
+            broadcasts = ', '.join(map(str, broadcasts))
            nd = node.inputs[id].ndim
            if nd > 0:
                print >> sio, """
@@ -883,9 +887,10 @@ nd_collapse_[i]=0;
                int *broadcasts_%(iname)s = NULL;
                """ % locals()
            emitted_inames[iname] = node.inputs[id]
        #check that all inputs have valid dimensions
        emitted_inames = {}
-        for id,iname in enumerate(inputs):
+        for id, iname in enumerate(inputs):
            if iname in emitted_inames:
                continue
            print >> sio, """

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -2,16 +2,18 @@ import logging
 _logger = logging.getLogger('theano.sandbox.cuda.opt')
 import sys
-import theano
 import numpy
-from theano.scan_module import scan_utils, scan_op
+import theano
 from theano import scalar as scal
 from theano import tensor, compile, gof
+from theano.compile import optdb
 from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
                        Optimizer, toolbox, DestroyHandler,
                        EquilibriumOptimizer)
+from theano.gof.python25 import all, any
 from theano.sandbox.cuda.basic_ops import *
 from theano.sandbox.cuda.type import CudaNdarrayType
 from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
@@ -27,7 +29,7 @@ from theano.sandbox.cuda.nnet import (
        GpuCrossentropySoftmax1HotWithBiasDx,
        GpuSoftmax, GpuSoftmaxWithBias)
 from theano.sandbox.cuda.elemwise import SupportCodeError
-from theano.compile import optdb
+from theano.scan_module import scan_utils, scan_op
 from theano.tensor.blas import _is_real_vector, _is_real_matrix
 #optdb.print_summary()  # shows what is currently registered

--- a/theano/sandbox/cuda/rng_curand.py
+++ b/theano/sandbox/cuda/rng_curand.py
@@ -7,9 +7,9 @@ __copyright__ = "(c) 2011, University of Montreal"
 __license__ = "3-clause BSD License"
 __contact__ = "theano-dev@googlegroups.com"
-import sys
 import numpy
 import theano.gof
+from theano.gof.python25 import all
 from theano.sandbox.cuda import CudaNdarrayType, GpuOp
 from theano.tensor import (get_vector_length, cast, opt)
 from theano.compile import optdb

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -13,6 +13,7 @@ import theano.sandbox.cuda as cuda_ndarray
 if cuda_ndarray.cuda_available == False:
    raise SkipTest('Optional package cuda disabled')
+from theano.gof.python25 import any
 import theano.sandbox.cuda as tcn
 import theano.sandbox.cuda as cuda
 import theano.sandbox.cuda.basic_ops as B

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -16,6 +16,7 @@ import theano.sandbox.cuda as tcn
 from theano.tensor.signal.downsample import (DownsampleFactorMax,
        DownsampleFactorMaxGrad)
+from theano.gof.python25 import any
 import theano.compile.mode
 from theano.tensor.tests.test_blas import BaseGemv, TestBlasStrides, TestGer

--- a/theano/sandbox/cuda/tests/test_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_cuda_ndarray.py
@@ -5,6 +5,7 @@ from nose.plugins.skip import SkipTest
 import numpy
 import theano
+from theano.gof.python25 import all
 import theano.sandbox.cuda as cuda_ndarray
 from theano.tensor.basic import _allclose
 from theano.tests import unittest_tools as utt

--- a/theano/sandbox/cuda/tests/test_driver.py
+++ b/theano/sandbox/cuda/tests/test_driver.py
@@ -7,6 +7,7 @@ import theano.sandbox.cuda as cuda_ndarray
 if cuda_ndarray.cuda_available == False:
    raise SkipTest('Optional package cuda disabled')
+from theano.gof.python25 import any
 import theano.sandbox.cuda as cuda
 import theano.sandbox.cuda.basic_ops as B

--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
@@ -2,6 +2,7 @@ from nose.plugins.skip import SkipTest
 import numpy
 import theano
+from theano.gof.python25 import any
 import theano.tensor as T
 import theano.tests.unittest_tools as utt

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -2,6 +2,7 @@ import theano
 from theano import Op, Apply
 import theano.tensor as T
 from theano.gof import local_optimizer
+from theano.gof.python25 import any
 from theano.sandbox.cuda import cuda_available, GpuOp
 if cuda_available:

--- a/theano/sandbox/test_multinomial.py
+++ b/theano/sandbox/test_multinomial.py
@@ -6,6 +6,7 @@ import theano
 from theano import config, function, tensor
 import multinomial
 from theano.compile.mode import get_default_mode, predefined_linkers
+from theano.gof.python25 import any
 import theano.sandbox.cuda as cuda
 def get_mode(gpu):

--- a/theano/sandbox/test_neighbours.py
+++ b/theano/sandbox/test_neighbours.py
@@ -7,6 +7,7 @@ from neighbours import (images2neibs, neibs2images,
 # Skip test if cuda_ndarray is not available.
 from nose.plugins.skip import SkipTest
 import theano.sandbox.cuda as cuda
+from theano.gof.python25 import any
 from theano.tests import unittest_tools

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4126,7 +4126,17 @@ class IncSubtensor(Op):
        def convert(entry):
            if isinstance(entry, gof.Type):
-                return indices.pop()
+                rval = indices.pop()
+                if sys.version_info < (2, 5):
+                    # Before Python 2.5, PySlice_GetIndicesEx requires
+                    # Python int to be passed.
+                    rval_ = int(rval)
+                    if rval_ != rval:
+                        raise IndexError((
+                            "Invalid value for indexing: %s. "
+                            "That value may be too big.") % rval)
+                    return rval_
+                return rval
            elif isinstance(entry, slice):
                return slice(convert(entry.start),
                             convert(entry.stop),