Merge pull request #5802 from affanv14/macro

change macro names

Merge pull request #5802 from affanv14/macro
72823c46 · Frédéric Bastien · GitHub · 0ffd130e · e230e63f · 72823c46
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -499,7 +499,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
        undef_macros = []
        for i, v in enumerate(node.inputs):
            if isinstance(v.type, GpuArrayType):
-                macro_name = "DTYPE_i%d" % (i,)
+                macro_name = "DTYPE_INPUT_%d" % (i,)
                macro_value = pygpu.gpuarray.dtype_to_ctype(v.dtype)
                define_macros.append(
                    define_template %
@@ -507,7 +507,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
                undef_macros.append(undef_template % macro_name)
        for i, v in enumerate(node.outputs):
            if isinstance(v.type, GpuArrayType):
-                macro_name = "DTYPE_o%d" % (i,)
+                macro_name = "DTYPE_OUTPUT_%d" % (i,)
                macro_value = pygpu.gpuarray.dtype_to_ctype(v.dtype)
                define_macros.append(
                    define_template %

--- a/theano/gpuarray/corr3d_gemm.c
+++ b/theano/gpuarray/corr3d_gemm.c
@@ -34,7 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Kernels for fast unfold + copy
 // GPU kernel for the case of dilation
 KERNEL void dilated_im3d2col_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_im,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset,
    const ga_size height, const ga_size width, const ga_size depth,
    const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_d,
@@ -42,7 +42,7 @@ KERNEL void dilated_im3d2col_kernel(const ga_size n,
    const ga_size pad_h, const ga_size pad_w, const ga_size pad_d,
    const ga_size stride_h, const ga_size stride_w, const ga_size stride_d,
    const ga_size height_col, const ga_size width_col, const ga_size depth_col,
-    GLOBAL_MEM DTYPE_i0 * data_col) {
+    GLOBAL_MEM DTYPE_INPUT_0 * data_col) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
@@ -56,10 +56,10 @@ KERNEL void dilated_im3d2col_kernel(const ga_size n,
    const ga_size h_offset = h_col * stride_h - pad_h;
    const ga_size w_offset = w_col * stride_w - pad_w;
    const ga_size d_offset = d_col * stride_d - pad_d;
-    DTYPE_i0 * data_col_ptr = data_col;
+    DTYPE_INPUT_0 * data_col_ptr = data_col;
    data_col_ptr += c_col * (height_col * width_col * depth_col) +
      h_col * (width_col * depth_col) + w_col * depth_col + d_col;
-    const DTYPE_i0 * data_im_ptr = data_im + data_im_offset;
+    const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
    data_im_ptr += c_im * (height * width * depth) +
      h_offset * (width * depth) + w_offset * depth + d_offset;
    for (ga_size i = 0; i < kernel_h; ++i) {
@@ -82,14 +82,14 @@ KERNEL void dilated_im3d2col_kernel(const ga_size n,
 #kernel im3d2col_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, * : 
 KERNEL void im3d2col_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_im,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset,
    const ga_size height, const ga_size width, const ga_size depth,
    const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_d,
    const ga_size pad_h, const ga_size pad_w, const ga_size pad_d,
    const ga_size stride_h, const ga_size stride_w, const ga_size stride_d,
    const ga_size height_col, const ga_size width_col, const ga_size depth_col,
-    GLOBAL_MEM DTYPE_i0 * data_col) {
+    GLOBAL_MEM DTYPE_INPUT_0 * data_col) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
@@ -103,10 +103,10 @@ KERNEL void im3d2col_kernel(const ga_size n,
    const ga_size h_offset = h_col * stride_h - pad_h;
    const ga_size w_offset = w_col * stride_w - pad_w;
    const ga_size d_offset = d_col * stride_d - pad_d;
-    DTYPE_i0 * data_col_ptr = data_col;
+    DTYPE_INPUT_0 * data_col_ptr = data_col;
    data_col_ptr += c_col * (height_col * width_col * depth_col) +
      h_col * (width_col * depth_col) + w_col * depth_col + d_col;
-    const DTYPE_i0 * data_im_ptr = data_im + data_im_offset;
+    const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
    data_im_ptr += c_im * (height * width * depth) +
      h_offset * (width * depth) + w_offset * depth + d_offset;
    for (ga_size i = 0; i < kernel_h; ++i) {
@@ -128,7 +128,7 @@ KERNEL void im3d2col_kernel(const ga_size n,
 // GPU kernel for the case of dilation
 #kernel dilated_col2im3d_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, *, size : 
 KERNEL void dilated_col2im3d_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_col,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_col,
    const ga_size height, const ga_size width, const ga_size depth,
    const ga_size channels,
    const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_d,
@@ -136,12 +136,12 @@ KERNEL void dilated_col2im3d_kernel(const ga_size n,
    const ga_size pad_h, const ga_size pad_w, const ga_size pad_d,
    const ga_size stride_h, const ga_size stride_w, const ga_size stride_d,
    const ga_size height_col, const ga_size width_col, const ga_size depth_col,
-    GLOBAL_MEM DTYPE_i0 * data_im,
+    GLOBAL_MEM DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
-    DTYPE_i0 val = 0;
+    DTYPE_INPUT_0 val = 0;
    const ga_size d_im = index % depth + pad_d;
    const ga_size w_index = index / depth;
    const ga_size w_im = w_index % width + pad_w;
@@ -190,19 +190,19 @@ KERNEL void dilated_col2im3d_kernel(const ga_size n,
 #kernel col2im3d_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, size, *, size : 
 KERNEL void col2im3d_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_col,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_col,
    const ga_size height, const ga_size width, const ga_size depth,
    const ga_size channels,
    const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_d,
    const ga_size pad_h, const ga_size pad_w, const ga_size pad_d,
    const ga_size stride_h, const ga_size stride_w, const ga_size stride_d,
    const ga_size height_col, const ga_size width_col, const ga_size depth_col,
-    GLOBAL_MEM DTYPE_i0 * data_im,
+    GLOBAL_MEM DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
-    DTYPE_i0 val = 0;
+    DTYPE_INPUT_0 val = 0;
    const ga_size d_im = index % depth + pad_d;
    const ga_size w_index = index / depth;
    const ga_size w_im = w_index % width + pad_w;

--- a/theano/gpuarray/corr_gemm.c
+++ b/theano/gpuarray/corr_gemm.c
@@ -34,7 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Kernels for fast unfold + copy
 // GPU kernel for the case of dilation
 KERNEL void dilated_im2col_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_im,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset,
    const ga_size height, const ga_size width,
    const ga_size kernel_h, const ga_size kernel_w,
@@ -42,7 +42,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
    const ga_size pad_h, const ga_size pad_w,
    const ga_size stride_h, const ga_size stride_w,
    const ga_size height_col, const ga_size width_col,
-    GLOBAL_MEM DTYPE_i0 * data_col) {
+    GLOBAL_MEM DTYPE_INPUT_0 * data_col) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
@@ -53,9 +53,9 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
    const ga_size c_col = c_im * kernel_h * kernel_w;
    const ga_size h_offset = h_col * stride_h - pad_h;
    const ga_size w_offset = w_col * stride_w - pad_w;
-    DTYPE_i0 * data_col_ptr = data_col;
+    DTYPE_INPUT_0 * data_col_ptr = data_col;
    data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
-    const DTYPE_i0 * data_im_ptr = data_im + data_im_offset;
+    const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
    data_im_ptr += (c_im * height + h_offset) * width + w_offset;
    for (ga_size i = 0; i < kernel_h; ++i) {
      for (ga_size j = 0; j < kernel_w; ++j) {
@@ -72,14 +72,14 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
 #kernel im2col_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, * : 
 KERNEL void im2col_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_im,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset,
    const ga_size height, const ga_size width,
    const ga_size kernel_h, const ga_size kernel_w,
    const ga_size pad_h, const ga_size pad_w,
    const ga_size stride_h, const ga_size stride_w,
    const ga_size height_col, const ga_size width_col,
-    GLOBAL_MEM DTYPE_i0 * data_col) {
+    GLOBAL_MEM DTYPE_INPUT_0 * data_col) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
@@ -90,9 +90,9 @@ KERNEL void im2col_kernel(const ga_size n,
    const ga_size c_col = c_im * kernel_h * kernel_w;
    const ga_size h_offset = h_col * stride_h - pad_h;
    const ga_size w_offset = w_col * stride_w - pad_w;
-    DTYPE_i0 * data_col_ptr = data_col;
+    DTYPE_INPUT_0 * data_col_ptr = data_col;
    data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
-    const DTYPE_i0 * data_im_ptr = data_im + data_im_offset;
+    const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
    data_im_ptr += (c_im * height + h_offset) * width + w_offset;
    for (ga_size i = 0; i < kernel_h; ++i) {
      for (ga_size j = 0; j < kernel_w; ++j) {
@@ -110,19 +110,19 @@ KERNEL void im2col_kernel(const ga_size n,
 // GPU kernel for the case of dilation
 #kernel dilated_col2im_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, size, size, *, size : 
 KERNEL void dilated_col2im_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_col,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_col,
    const ga_size height, const ga_size width, const ga_size channels,
    const ga_size kernel_h, const ga_size kernel_w,
    const ga_size dilation_h, const ga_size dilation_w,
    const ga_size pad_h, const ga_size pad_w,
    const ga_size stride_h, const ga_size stride_w,
    const ga_size height_col, const ga_size width_col,
-    GLOBAL_MEM DTYPE_i0 * data_im,
+    GLOBAL_MEM DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
-    DTYPE_i0 val = 0;
+    DTYPE_INPUT_0 val = 0;
    const ga_size w_im = index % width + pad_w;
    const ga_size h_im = (index / width) % height + pad_h;
    const ga_size c_im = index / (width * height);
@@ -155,18 +155,18 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
 #kernel col2im_kernel : size, *, size, size, size, size, size, size, size, size, size, size, size, *, size : 
 KERNEL void col2im_kernel(const ga_size n,
-    GLOBAL_MEM const DTYPE_i0 * data_col,
+    GLOBAL_MEM const DTYPE_INPUT_0 * data_col,
    const ga_size height, const ga_size width, const ga_size channels,
    const ga_size kernel_h, const ga_size kernel_w,
    const ga_size pad_h, const ga_size pad_w,
    const ga_size stride_h, const ga_size stride_w,
    const ga_size height_col, const ga_size width_col,
-    GLOBAL_MEM DTYPE_i0 * data_im,
+    GLOBAL_MEM DTYPE_INPUT_0 * data_im,
    const ga_size data_im_offset) {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
       index < (n); index += LDIM_0 * GDIM_0) {
-    DTYPE_i0 val = 0;
+    DTYPE_INPUT_0 val = 0;
    const ga_size w_im = index % width + pad_w;
    const ga_size h_im = (index / width) % height + pad_h;
    const ga_size c_im = index / (width * height);

--- a/theano/gpuarray/pool.c
+++ b/theano/gpuarray/pool.c
@@ -6,9 +6,9 @@
 KERNEL void max_pool2d_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_height,
   const ga_size pooled_width, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, const ga_size kernel_h, const ga_size kernel_w,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_h, const ga_size stride_w, const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *z)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -26,8 +26,8 @@ KERNEL void max_pool2d_kernel(const ga_size nthreads,
    wstart = max(wstart, 0);
    const ga_size offset = (n*channels + c) * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    DTYPE_o0 maxval = x_slice[hstart*width + wstart];
+    DTYPE_OUTPUT_0 maxval = x_slice[hstart*width + wstart];
    for (ga_size h=hstart; h < hend; ++h) {
      for (ga_size w=wstart; w < wend; ++w) {
@@ -48,10 +48,10 @@ KERNEL void max_pool3d_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_depth,
   const ga_size pooled_height, const ga_size pooled_width,
   const ga_size depth, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, const ga_size kernel_d, const ga_size kernel_h,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, const ga_size kernel_d, const ga_size kernel_h,
   const ga_size kernel_w, const ga_size stride_d, const ga_size stride_h,
   const ga_size stride_w, const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *z)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -73,8 +73,8 @@ KERNEL void max_pool3d_kernel(const ga_size nthreads,
    wstart = max(wstart, 0);
    const ga_size offset = (n*channels + c) * depth * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    DTYPE_o0 maxval = x_slice[(dstart*height + hstart)*width + wstart];
+    DTYPE_OUTPUT_0 maxval = x_slice[(dstart*height + hstart)*width + wstart];
    for (ga_size d=dstart; d < dend; ++d) {
      for (ga_size h=hstart; h < hend; ++h) {
@@ -96,10 +96,10 @@ KERNEL void max_pool3d_kernel(const ga_size nthreads,
 KERNEL void ave_pool2d_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_height,
   const ga_size pooled_width, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, const ga_size kernel_h, const ga_size kernel_w,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_h, const ga_size stride_w, const ga_size pad_h, const ga_size pad_w,
   const ga_bool inc_pad, const ga_bool sum_mode,
-   GLOBAL_MEM DTYPE_o0 *z)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -126,8 +126,8 @@ KERNEL void ave_pool2d_kernel(const ga_size nthreads,
    }
    const ga_size offset = (n*channels + c) * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    DTYPE_o0 collector = 0;
+    DTYPE_OUTPUT_0 collector = 0;
    for (ga_size h=hstart; h < hend; ++h) {
      for (ga_size w=wstart; w < wend; ++w) {
@@ -150,11 +150,11 @@ KERNEL void ave_pool3d_kernel(const ga_size nthreads,
                              const ga_size num, const ga_size channels, const ga_size pooled_depth,
                              const ga_size pooled_height, const ga_size pooled_width,
                              const ga_size depth, const ga_size height, const ga_size width,
-                              GLOBAL_MEM const DTYPE_i0 *x, const ga_size kernel_d, const ga_size kernel_h,
+                              GLOBAL_MEM const DTYPE_INPUT_0 *x, const ga_size kernel_d, const ga_size kernel_h,
                              const ga_size kernel_w, const ga_size stride_d, const ga_size stride_h,
                              const ga_size stride_w, const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
                              const ga_bool inc_pad, const ga_bool sum_mode,
-                              GLOBAL_MEM DTYPE_o0 *z)
+                              GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -186,8 +186,8 @@ KERNEL void ave_pool3d_kernel(const ga_size nthreads,
    }
    const ga_size offset = (n*channels + c) * depth * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    DTYPE_o0 collector = 0;
+    DTYPE_OUTPUT_0 collector = 0;
    for (ga_size d=dstart; d < dend; ++d) {
      for (ga_size h=hstart; h < hend; ++h) {

--- a/theano/gpuarray/pool_ave_grad.c
+++ b/theano/gpuarray/pool_ave_grad.c
@@ -6,10 +6,10 @@
 KERNEL void ave_pool2d_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size height,
   const ga_size width, const ga_size pooled_height, const ga_size pooled_width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *gz,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *gz,
   const ga_size kernel_h, const ga_size kernel_w, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_h, const ga_size pad_w, const ga_bool inc_pad, const ga_bool sum_mode,
-   GLOBAL_MEM DTYPE_o0 *gx)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *gx)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -24,8 +24,8 @@ KERNEL void ave_pool2d_grad_kernel(const ga_size nthreads,
    const ga_size pwend = min((w + pad_w) / stride_w + 1, pooled_width);
    const ga_size offset = (n*channels + c) * pooled_height * pooled_width;
-    const DTYPE_i1* gz_slice = gz + offset;
+    const DTYPE_INPUT_1* gz_slice = gz + offset;
-    DTYPE_o0 collector = 0;
+    DTYPE_OUTPUT_0 collector = 0;
    for (ga_size ph=phstart; ph < phend; ++ph) {
      for (ga_size pw=pwstart; pw < pwend; ++pw) {
@@ -53,11 +53,11 @@ KERNEL void ave_pool3d_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size depth,
   const ga_size height, const ga_size width, const ga_size pooled_depth,
   const ga_size pooled_height, const ga_size pooled_width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *gz,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *gz,
   const ga_size kernel_d, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_d, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
-   const ga_bool inc_pad, const ga_bool sum_mode, GLOBAL_MEM DTYPE_o0 *gx)
+   const ga_bool inc_pad, const ga_bool sum_mode, GLOBAL_MEM DTYPE_OUTPUT_0 *gx)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -75,8 +75,8 @@ KERNEL void ave_pool3d_grad_kernel(const ga_size nthreads,
    const ga_size pwend = min((w + pad_w) / stride_w + 1, pooled_width);
    const ga_size offset = (n*channels + c) * pooled_depth * pooled_height * pooled_width;
-    const DTYPE_i1* gz_slice = gz + offset;
+    const DTYPE_INPUT_1* gz_slice = gz + offset;
-    DTYPE_o0 collector = 0;
+    DTYPE_OUTPUT_0 collector = 0;
    for (ga_size pd=pdstart; pd < pdend; ++pd) {
      for (ga_size ph=phstart; ph < phend; ++ph) {

--- a/theano/gpuarray/pool_grad_grad.c
+++ b/theano/gpuarray/pool_grad_grad.c
@@ -5,10 +5,10 @@
 KERNEL void max_pool2d_grad_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_height,
   const ga_size pooled_width, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *z, GLOBAL_MEM const DTYPE_i2 *gx,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *z, GLOBAL_MEM const DTYPE_INPUT_2 *gx,
   const ga_size kernel_h, const ga_size kernel_w, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *gz)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *gz)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -26,9 +26,9 @@ KERNEL void max_pool2d_grad_grad_kernel(const ga_size nthreads,
    const ga_size offset = (n*channels + c) * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    const DTYPE_i2* gx_slice = gx + offset;
+    const DTYPE_INPUT_2* gx_slice = gx + offset;
-    DTYPE_o0 gradient = 0;
+    DTYPE_OUTPUT_0 gradient = 0;
    for (ga_size h=hstart; h < hend; ++h) {
      for (ga_size w=wstart; w < wend; ++w) {
@@ -48,11 +48,11 @@ KERNEL void max_pool3d_grad_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_depth,
   const ga_size pooled_height, const ga_size pooled_width,
   const ga_size depth, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *z, GLOBAL_MEM const DTYPE_i2 *gx,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *z, GLOBAL_MEM const DTYPE_INPUT_2 *gx,
   const ga_size kernel_d, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_d, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *gz)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *gz)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -74,9 +74,9 @@ KERNEL void max_pool3d_grad_grad_kernel(const ga_size nthreads,
    const ga_size offset = (n*channels + c) * depth * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    const DTYPE_i2* gx_slice = gx + offset;
+    const DTYPE_INPUT_2* gx_slice = gx + offset;
-    DTYPE_o0 gradient = 0;
+    DTYPE_OUTPUT_0 gradient = 0;
    for (ga_size d=dstart; d < dend; ++d) {
      for (ga_size h=hstart; h < hend; ++h) {

--- a/theano/gpuarray/pool_max_grad.c
+++ b/theano/gpuarray/pool_max_grad.c
@@ -6,9 +6,9 @@
 KERNEL void max_pool2d_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size height,
   const ga_size width, const ga_size pooled_height, const ga_size pooled_width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *z, GLOBAL_MEM const DTYPE_i2 *gz,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *z, GLOBAL_MEM const DTYPE_INPUT_2 *gz,
   const ga_size kernel_h, const ga_size kernel_w, const ga_size stride_h, const ga_size stride_w,
-   const ga_size pad_h, const ga_size pad_w, GLOBAL_MEM DTYPE_o0 *gx)
+   const ga_size pad_h, const ga_size pad_w, GLOBAL_MEM DTYPE_OUTPUT_0 *gx)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -23,9 +23,9 @@ KERNEL void max_pool2d_grad_kernel(const ga_size nthreads,
    const ga_size pwend = min((w + pad_w) / stride_w + 1, pooled_width);
    const ga_size offset = (n*channels + c) * pooled_height * pooled_width;
-    const DTYPE_i1* z_slice = z + offset;
+    const DTYPE_INPUT_1* z_slice = z + offset;
-    const DTYPE_i2* gz_slice = gz + offset;
+    const DTYPE_INPUT_2* gz_slice = gz + offset;
-    DTYPE_o0 gradient = 0;
+    DTYPE_OUTPUT_0 gradient = 0;
    for (ga_size ph=phstart; ph < phend; ++ph) {
      for (ga_size pw=pwstart; pw < pwend; ++pw) {
@@ -45,11 +45,11 @@ KERNEL void max_pool3d_grad_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size depth,
   const ga_size height, const ga_size width, const ga_size pooled_depth,
   const ga_size pooled_height, const ga_size pooled_width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *z, GLOBAL_MEM const DTYPE_i2 *gz,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *z, GLOBAL_MEM const DTYPE_INPUT_2 *gz,
   const ga_size kernel_d, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_d, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *gx)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *gx)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -67,9 +67,9 @@ KERNEL void max_pool3d_grad_kernel(const ga_size nthreads,
    const ga_size pwend = min((w + pad_w) / stride_w + 1, pooled_width);
    const ga_size offset = (n*channels + c) * pooled_depth * pooled_height * pooled_width;
-    const DTYPE_i1* z_slice = z + offset;
+    const DTYPE_INPUT_1* z_slice = z + offset;
-    const DTYPE_i2* gz_slice = gz + offset;
+    const DTYPE_INPUT_2* gz_slice = gz + offset;
-    DTYPE_o0 gradient = 0;
+    DTYPE_OUTPUT_0 gradient = 0;
    for (ga_size pd=pdstart; pd < pdend; ++pd) {
      for (ga_size ph=phstart; ph < phend; ++ph) {

--- a/theano/gpuarray/pool_max_rop.c
+++ b/theano/gpuarray/pool_max_rop.c
@@ -6,11 +6,11 @@
 KERNEL void max_pool2d_rop_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_height,
   const ga_size pooled_width, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *ex,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *ex,
   const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *z)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -28,10 +28,10 @@ KERNEL void max_pool2d_rop_kernel(const ga_size nthreads,
    wstart = max(wstart, 0);
    const ga_size offset = (n*channels + c) * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    const DTYPE_i1* ex_slice = ex + offset;
+    const DTYPE_INPUT_1* ex_slice = ex + offset;
-    DTYPE_o0 maxval = x_slice[hstart*width + wstart];
+    DTYPE_OUTPUT_0 maxval = x_slice[hstart*width + wstart];
-    DTYPE_o0 collector = ex_slice[hstart*width + wstart];
+    DTYPE_OUTPUT_0 collector = ex_slice[hstart*width + wstart];
    for (ga_size h=hstart; h < hend; ++h) {
      for (ga_size w=wstart; w < wend; ++w) {
@@ -53,11 +53,11 @@ KERNEL void max_pool3d_rop_kernel(const ga_size nthreads,
   const ga_size num, const ga_size channels, const ga_size pooled_depth,
   const ga_size pooled_height, const ga_size pooled_width,
   const ga_size depth, const ga_size height, const ga_size width,
-   GLOBAL_MEM const DTYPE_i0 *x, GLOBAL_MEM const DTYPE_i1 *ex,
+   GLOBAL_MEM const DTYPE_INPUT_0 *x, GLOBAL_MEM const DTYPE_INPUT_1 *ex,
   const ga_size kernel_d, const ga_size kernel_h, const ga_size kernel_w,
   const ga_size stride_d, const ga_size stride_h, const ga_size stride_w,
   const ga_size pad_d, const ga_size pad_h, const ga_size pad_w,
-   GLOBAL_MEM DTYPE_o0 *z)
+   GLOBAL_MEM DTYPE_OUTPUT_0 *z)
 {
  // grid stride looping
  for (ga_size index = GID_0 * LDIM_0 + LID_0;
@@ -79,10 +79,10 @@ KERNEL void max_pool3d_rop_kernel(const ga_size nthreads,
    wstart = max(wstart, 0);
    const ga_size offset = (n*channels + c) * depth * height * width;
-    const DTYPE_i0* x_slice = x + offset;
+    const DTYPE_INPUT_0* x_slice = x + offset;
-    const DTYPE_i1* ex_slice = ex + offset;
+    const DTYPE_INPUT_1* ex_slice = ex + offset;
-    DTYPE_o0 maxval = x_slice[(dstart*height + hstart)*width + wstart];
+    DTYPE_OUTPUT_0 maxval = x_slice[(dstart*height + hstart)*width + wstart];
-    DTYPE_o0 collector = ex_slice[(dstart*height + hstart)*width + wstart];
+    DTYPE_OUTPUT_0 collector = ex_slice[(dstart*height + hstart)*width + wstart];
    for (ga_size d=dstart; d < dend; ++d) {
      for (ga_size h=hstart; h < hend; ++h) {

--- a/theano/gpuarray/tests/tstgpueye.c
+++ b/theano/gpuarray/tests/tstgpueye.c
@@ -7,7 +7,7 @@
   has to match the kernel function name below.
 */
-KERNEL void eye(GLOBAL_MEM DTYPE_o0 *a, ga_size n, ga_size m) {
+KERNEL void eye(GLOBAL_MEM DTYPE_OUTPUT_0 *a, ga_size n, ga_size m) {
  ga_size nb = n < m ? n : m;
  for (ga_size i = LID_0; i < nb; i += LDIM_0) {
    a[i*m + i] = 1;