提交 c66b296e authored 作者: notoraptor's avatar notoraptor

Apply same changes to all ops in `gpuarray/pool.py`.

Cast ws, strides and pads to 64 bits integers into both Python and C code for all these ops.
上级 49eb0c80
...@@ -241,9 +241,9 @@ int APPLY_SPECIFIC(pool)(PyGpuArrayObject *x, ...@@ -241,9 +241,9 @@ int APPLY_SPECIFIC(pool)(PyGpuArrayObject *x,
size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1]; size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1];
int nonzero_padding = 0; int nonzero_padding = 0;
for (int i = 0; i < ndims; i++) { for (int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i)); w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i)); p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]); z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]);
if (p[i] > 0) { if (p[i] > 0) {
nonzero_padding = 1; nonzero_padding = 1;
......
...@@ -2,7 +2,6 @@ from __future__ import absolute_import, print_function, division ...@@ -2,7 +2,6 @@ from __future__ import absolute_import, print_function, division
import os.path import os.path
import theano import theano
import numpy as np
from theano import Apply from theano import Apply
from theano.tensor.basic import as_tensor_variable from theano.tensor.basic import as_tensor_variable
from theano.tensor.signal.pool import Pool from theano.tensor.signal.pool import Pool
...@@ -16,6 +15,8 @@ except ImportError as e: ...@@ -16,6 +15,8 @@ except ImportError as e:
# To make sure theano is importable # To make sure theano is importable
pass pass
dtype_name_for_casting = 'int64'
class GpuPool(CGpuKernelBase): class GpuPool(CGpuKernelBase):
""" """
...@@ -68,9 +69,7 @@ class GpuPool(CGpuKernelBase): ...@@ -68,9 +69,7 @@ class GpuPool(CGpuKernelBase):
raise TypeError('Stride parameters must be ints.') raise TypeError('Stride parameters must be ints.')
if pad.dtype not in theano.tensor.int_dtypes: if pad.dtype not in theano.tensor.int_dtypes:
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
# I can't assume that npy_intp is 64 bits, as it can be 32 bits on some computers (according to NumPy doc),
# so I prefer to use the "bit-width name for this data-type" for casting.
dtype_name_for_casting = np.dtype(np.intp).name
ws = theano.tensor.cast(ws, dtype_name_for_casting) ws = theano.tensor.cast(ws, dtype_name_for_casting)
stride = theano.tensor.cast(stride, dtype_name_for_casting) stride = theano.tensor.cast(stride, dtype_name_for_casting)
pad = theano.tensor.cast(pad, dtype_name_for_casting) pad = theano.tensor.cast(pad, dtype_name_for_casting)
...@@ -191,7 +190,6 @@ class GpuMaxPoolGrad(CGpuKernelBase): ...@@ -191,7 +190,6 @@ class GpuMaxPoolGrad(CGpuKernelBase):
if pad.dtype not in theano.tensor.int_dtypes: if pad.dtype not in theano.tensor.int_dtypes:
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
dtype_name_for_casting = np.dtype(np.intp).name
ws = theano.tensor.cast(ws, dtype_name_for_casting) ws = theano.tensor.cast(ws, dtype_name_for_casting)
stride = theano.tensor.cast(stride, dtype_name_for_casting) stride = theano.tensor.cast(stride, dtype_name_for_casting)
pad = theano.tensor.cast(pad, dtype_name_for_casting) pad = theano.tensor.cast(pad, dtype_name_for_casting)
...@@ -271,7 +269,6 @@ class GpuAveragePoolGrad(CGpuKernelBase): ...@@ -271,7 +269,6 @@ class GpuAveragePoolGrad(CGpuKernelBase):
if pad.dtype not in theano.tensor.int_dtypes: if pad.dtype not in theano.tensor.int_dtypes:
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
dtype_name_for_casting = np.dtype(np.intp).name
ws = theano.tensor.cast(ws, dtype_name_for_casting) ws = theano.tensor.cast(ws, dtype_name_for_casting)
stride = theano.tensor.cast(stride, dtype_name_for_casting) stride = theano.tensor.cast(stride, dtype_name_for_casting)
pad = theano.tensor.cast(pad, dtype_name_for_casting) pad = theano.tensor.cast(pad, dtype_name_for_casting)
...@@ -353,6 +350,11 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase): ...@@ -353,6 +350,11 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
raise TypeError('Stride parameters must be ints.') raise TypeError('Stride parameters must be ints.')
if pad.dtype not in theano.tensor.int_dtypes: if pad.dtype not in theano.tensor.int_dtypes:
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
ws = theano.tensor.cast(ws, dtype_name_for_casting)
stride = theano.tensor.cast(stride, dtype_name_for_casting)
pad = theano.tensor.cast(pad, dtype_name_for_casting)
return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()]) return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node): def get_params(self, node):
...@@ -421,13 +423,17 @@ class GpuMaxPoolRop(CGpuKernelBase): ...@@ -421,13 +423,17 @@ class GpuMaxPoolRop(CGpuKernelBase):
pad = as_tensor_variable(pad) pad = as_tensor_variable(pad)
assert ws.ndim == stride.ndim and ws.ndim == pad.ndim assert ws.ndim == stride.ndim and ws.ndim == pad.ndim
assert ws.ndim == 1 assert ws.ndim == 1
if not ws.dtype.startswith('int'): if ws.dtype not in theano.tensor.int_dtypes:
raise TypeError('Window shape parameters must be ints.') raise TypeError('Window shape parameters must be ints.')
if not stride.dtype.startswith('int'): if stride.dtype not in theano.tensor.int_dtypes:
raise TypeError('Stride parameters must be ints.') raise TypeError('Stride parameters must be ints.')
if not pad.dtype.startswith('int'): if pad.dtype not in theano.tensor.int_dtypes:
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
ws = theano.tensor.cast(ws, dtype_name_for_casting)
stride = theano.tensor.cast(stride, dtype_name_for_casting)
pad = theano.tensor.cast(pad, dtype_name_for_casting)
return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()]) return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])
def get_params(self, node): def get_params(self, node):
......
...@@ -138,9 +138,9 @@ int APPLY_SPECIFIC(ave_pool_grad)(PyGpuArrayObject *x, ...@@ -138,9 +138,9 @@ int APPLY_SPECIFIC(ave_pool_grad)(PyGpuArrayObject *x,
size_t s[3]; size_t s[3];
size_t p[3]; size_t p[3];
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i)); w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i)); p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
} }
int err; int err;
......
...@@ -132,9 +132,9 @@ int APPLY_SPECIFIC(pool_grad_grad)(PyGpuArrayObject *x, ...@@ -132,9 +132,9 @@ int APPLY_SPECIFIC(pool_grad_grad)(PyGpuArrayObject *x,
size_t s[3]; size_t s[3];
size_t p[3]; size_t p[3];
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i)); w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i)); p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
} }
int err; int err;
......
...@@ -124,9 +124,9 @@ int APPLY_SPECIFIC(max_pool_grad)(PyGpuArrayObject *x, ...@@ -124,9 +124,9 @@ int APPLY_SPECIFIC(max_pool_grad)(PyGpuArrayObject *x,
size_t s[3]; size_t s[3];
size_t p[3]; size_t p[3];
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i)); w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i)); p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
} }
int err; int err;
......
...@@ -137,9 +137,9 @@ int APPLY_SPECIFIC(max_pool_rop)(PyGpuArrayObject *x, ...@@ -137,9 +137,9 @@ int APPLY_SPECIFIC(max_pool_rop)(PyGpuArrayObject *x,
size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1]; size_t p[3]; z_dims[0] = x_dims[0]; z_dims[1] = x_dims[1];
int nonzero_padding = 0; int nonzero_padding = 0;
for (int i = 0; i < ndims; i++) { for (int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i)); w[i] = *((npy_int64*)PyArray_GETPTR1(ws, i));
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_int64*)PyArray_GETPTR1(stride, i));
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i)); p[i] = *((npy_int64*)PyArray_GETPTR1(pad, i));
z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]); z_dims[2 + i] = OUTPUT_DIMS(x_dims[2 + i] + 2*p[i], w[i], s[i]);
if (p[i] > 0) { if (p[i] > 0) {
nonzero_padding = 1; nonzero_padding = 1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论