Apply same changes to all ops in `gpuarray/pool.py`.

Cast ws, strides and pads to 64 bits integers into both Python and C code for all these ops.

Apply same changes to all ops in `gpuarray/pool.py`.
c66b296e · notoraptor · 49eb0c80 · c66b296e · c66b296e · c66b296e
--- a/theano/gpuarray/pool.c
+++ b/theano/gpuarray/pool.c
@@ -241,9 +241,9 @@ int APPLY_SPECIFIC(pool)(PyGpuArrayObject *x,
  size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1];
  int nonzero_padding = 0;
  for (int i = 0; i < ndims; i++) {
-    w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
+    w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
-    s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
+    s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
-    p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
+    p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
    z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]);
    if (p[i] > 0) {
      nonzero_padding = 1;

--- a/theano/gpuarray/pool.py
+++ b/theano/gpuarray/pool.py
@@ -2,7 +2,6 @@ from __future__ import absolute_import, print_function, division
 import os.path
 import theano
-import numpy as np
 from theano import Apply
 from theano.tensor.basic import as_tensor_variable
 from theano.tensor.signal.pool import Pool
@@ -16,6 +15,8 @@ except ImportError as e:
    # To make sure theano is importable
    pass
+dtype_name_for_casting = 'int64'
 class GpuPool(CGpuKernelBase):
    """
@@ -68,9 +69,7 @@ class GpuPool(CGpuKernelBase):
            raise TypeError('Stride parameters must be ints.')
        if pad.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Padding parameters must be ints.')
-        # I can't assume that npy_intp is 64 bits, as it can be 32 bits on some computers (according to NumPy doc),
-        # so I prefer to use the "bit-width name for this data-type" for casting.
-        dtype_name_for_casting = np.dtype(np.intp).name
        ws = theano.tensor.cast(ws, dtype_name_for_casting)
        stride = theano.tensor.cast(stride, dtype_name_for_casting)
        pad = theano.tensor.cast(pad, dtype_name_for_casting)
@@ -191,7 +190,6 @@ class GpuMaxPoolGrad(CGpuKernelBase):
        if pad.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Padding parameters must be ints.')
-        dtype_name_for_casting = np.dtype(np.intp).name
        ws = theano.tensor.cast(ws, dtype_name_for_casting)
        stride = theano.tensor.cast(stride, dtype_name_for_casting)
        pad = theano.tensor.cast(pad, dtype_name_for_casting)
@@ -271,7 +269,6 @@ class GpuAveragePoolGrad(CGpuKernelBase):
        if pad.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Padding parameters must be ints.')
-        dtype_name_for_casting = np.dtype(np.intp).name
        ws = theano.tensor.cast(ws, dtype_name_for_casting)
        stride = theano.tensor.cast(stride, dtype_name_for_casting)
        pad = theano.tensor.cast(pad, dtype_name_for_casting)
@@ -353,6 +350,11 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
            raise TypeError('Stride parameters must be ints.')
        if pad.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Padding parameters must be ints.')
+        ws = theano.tensor.cast(ws, dtype_name_for_casting)
+        stride = theano.tensor.cast(stride, dtype_name_for_casting)
+        pad = theano.tensor.cast(pad, dtype_name_for_casting)
        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
    def get_params(self, node):
@@ -421,13 +423,17 @@ class GpuMaxPoolRop(CGpuKernelBase):
        pad = as_tensor_variable(pad)
        assert ws.ndim == stride.ndim and ws.ndim == pad.ndim
        assert ws.ndim == 1
-        if not ws.dtype.startswith('int'):
+        if ws.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Window shape parameters must be ints.')
-        if not stride.dtype.startswith('int'):
+        if stride.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Stride parameters must be ints.')
-        if not pad.dtype.startswith('int'):
+        if pad.dtype not in theano.tensor.int_dtypes:
            raise TypeError('Padding parameters must be ints.')
+        ws = theano.tensor.cast(ws, dtype_name_for_casting)
+        stride = theano.tensor.cast(stride, dtype_name_for_casting)
+        pad = theano.tensor.cast(pad, dtype_name_for_casting)
        return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])
    def get_params(self, node):

--- a/theano/gpuarray/pool_ave_grad.c
+++ b/theano/gpuarray/pool_ave_grad.c
@@ -138,9 +138,9 @@ int APPLY_SPECIFIC(ave_pool_grad)(PyGpuArrayObject *x,
    size_t s[3];
    size_t p[3];
    for(int i = 0; i < ndims; i++) {
-      w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
+      w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
-      s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
+      s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
-      p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
+      p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
    }
    int err;

--- a/theano/gpuarray/pool_grad_grad.c
+++ b/theano/gpuarray/pool_grad_grad.c
@@ -132,9 +132,9 @@ int APPLY_SPECIFIC(pool_grad_grad)(PyGpuArrayObject *x,
    size_t s[3];
    size_t p[3];
    for(int i = 0; i < ndims; i++) {
-      w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
+      w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
-      s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
+      s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
-      p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
+      p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
    }
    int err;

--- a/theano/gpuarray/pool_max_grad.c
+++ b/theano/gpuarray/pool_max_grad.c
@@ -124,9 +124,9 @@ int APPLY_SPECIFIC(max_pool_grad)(PyGpuArrayObject *x,
    size_t s[3];
    size_t p[3];
    for(int i = 0; i < ndims; i++) {
-      w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
+      w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
-      s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
+      s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
-      p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
+      p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
    }
    int err;

--- a/theano/gpuarray/pool_max_rop.c
+++ b/theano/gpuarray/pool_max_rop.c
@@ -137,9 +137,9 @@ int APPLY_SPECIFIC(max_pool_rop)(PyGpuArrayObject *x,
  size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1];
  int nonzero_padding = 0;
  for (int i = 0; i < ndims; i++) {
-    w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
+    w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
-    s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
+    s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
-    p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
+    p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
    z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]);
    if (p[i] > 0) {
      nonzero_padding = 1;