提交 786f2b43 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4835 from abergeron/fix_debugmode

Make the DLT mlp example learn in float16
......@@ -369,7 +369,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
return node.inputs[0].type.context
def c_code_cache_version(self):
return (11,)
return (12,)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>']
......@@ -499,7 +499,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
load_sm = load_w(dtype_sm)
write_dx = write_w(dtype_dx)
flags = Kernel.get_flags(dtype_dnll, dtype_sm, dtype_y_idx, dtype_dx)
type_dnll = gpuarray.dtype_to_ctype(work_dnll)
wtype_dnll = gpuarray.dtype_to_ctype(work_dnll)
type_dnll = gpuarray.dtype_to_ctype(dtype_dnll)
type_sm = gpuarray.dtype_to_ctype(dtype_sm)
type_y_idx = gpuarray.dtype_to_ctype(dtype_y_idx)
type_dx = gpuarray.dtype_to_ctype(dtype_dx)
......@@ -525,7 +526,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
for (int i = blockIdx.x; i < N; i += gridDim.x)
{
%(type_dnll)s dnll_i = %(load_dnll)s(dnll[i * dnll_s0]);
%(wtype_dnll)s dnll_i = %(load_dnll)s(dnll[i * dnll_s0]);
%(type_y_idx)s y_i = y_idx[i * y_idx_s0];
for (int j = threadIdx.x; j < K; j += blockDim.x)
......
......@@ -463,8 +463,8 @@ if int(config.tensor.cmp_sloppy) > 1:
# When config.tensor.cmp_sloppy>1 we are even more sloppy. This is
# useful to test the GPU as they don't use extended precision and
# this cause some difference bigger then the normal sloppy.
float16_atol = 5e-3
float16_rtol = 1e-2
float16_atol = 1e-2
float16_rtol = 5e-2
float32_atol = 5e-4
float32_rtol = 1e-3
......@@ -472,8 +472,8 @@ if int(config.tensor.cmp_sloppy) > 1:
float64_rtol = 1e-4
float64_atol = 1e-3
elif int(config.tensor.cmp_sloppy):
float16_atol = 1e-3
float16_rtol = 5e-3
float16_atol = 5e-3
float16_rtol = 1e-2
float32_atol = 1e-4
float32_rtol = 1e-3
......@@ -483,8 +483,8 @@ elif int(config.tensor.cmp_sloppy):
else:
# If you change those value in test don't forget to put them back
# when the test end. Don't forget the case when the test fail.
float16_atol = 5e-4
float16_rtol = 5e-4
float16_atol = 1e-3
float16_rtol = 1e-3
float32_atol = 1e-5
float32_rtol = 1e-5
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论