Merge pull request #4835 from abergeron/fix_debugmode

Make the DLT mlp example learn in float16

Merge pull request #4835 from abergeron/fix_debugmode
786f2b43 · Pascal Lamblin · GitHub · 7caaa80f · 655fa228 · 786f2b43
--- a/theano/gpuarray/nnet.py
+++ b/theano/gpuarray/nnet.py
@@ -369,7 +369,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
        return node.inputs[0].type.context

    def c_code_cache_version(self):
-        return (11,)
+        return (12,)

    def c_headers(self):
        return ['<numpy_compat.h>', '<gpuarray/types.h>']
@@ -499,7 +499,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
        load_sm = load_w(dtype_sm)
        write_dx = write_w(dtype_dx)
        flags = Kernel.get_flags(dtype_dnll, dtype_sm, dtype_y_idx, dtype_dx)
-        type_dnll = gpuarray.dtype_to_ctype(work_dnll)
+        wtype_dnll = gpuarray.dtype_to_ctype(work_dnll)
+        type_dnll = gpuarray.dtype_to_ctype(dtype_dnll)
        type_sm = gpuarray.dtype_to_ctype(dtype_sm)
        type_y_idx = gpuarray.dtype_to_ctype(dtype_y_idx)
        type_dx = gpuarray.dtype_to_ctype(dtype_dx)
@@ -525,7 +526,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):

            for (int i = blockIdx.x; i < N; i += gridDim.x)
            {
-                %(type_dnll)s dnll_i = %(load_dnll)s(dnll[i * dnll_s0]);
+                %(wtype_dnll)s dnll_i = %(load_dnll)s(dnll[i * dnll_s0]);
                %(type_y_idx)s y_i = y_idx[i * y_idx_s0];

                for (int j = threadIdx.x; j < K; j += blockDim.x)

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -463,8 +463,8 @@ if int(config.tensor.cmp_sloppy) > 1:
    # When config.tensor.cmp_sloppy>1 we are even more sloppy. This is
    # useful to test the GPU as they don't use extended precision and
    # this cause some difference bigger then the normal sloppy.
-    float16_atol = 5e-3
-    float16_rtol = 1e-2
+    float16_atol = 1e-2
+    float16_rtol = 5e-2

    float32_atol = 5e-4
    float32_rtol = 1e-3
@@ -472,8 +472,8 @@ if int(config.tensor.cmp_sloppy) > 1:
    float64_rtol = 1e-4
    float64_atol = 1e-3
 elif int(config.tensor.cmp_sloppy):
-    float16_atol = 1e-3
-    float16_rtol = 5e-3
+    float16_atol = 5e-3
+    float16_rtol = 1e-2

    float32_atol = 1e-4
    float32_rtol = 1e-3
@@ -483,8 +483,8 @@ elif int(config.tensor.cmp_sloppy):
 else:
    # If you change those value in test don't forget to put them back
    # when the test end.  Don't forget the case when the test fail.
-    float16_atol = 5e-4
-    float16_rtol = 5e-4
+    float16_atol = 1e-3
+    float16_rtol = 1e-3

    float32_atol = 1e-5
    float32_rtol = 1e-5