提交 5647b421 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Make sure MRG uniform in float16 do not return 0

Also update Python code to be consistent.
上级 9df6ce4e
......@@ -61,18 +61,21 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
otype = 'ga_half'
# limit the values of the state that we use.
mask = '& 0x7fff'
NORM = '3.0518e-05f' # numpy.float16(1.0/(2**15+8))
offset = '+ 1'
NORM = '3.0458e-05f' # numpy.float16(1.0/(2**15+33))
# this was determined by finding the biggest number such that
# numpy.float16(number * (M1 & 0x7fff)) < 1.0
# numpy.float16(number * ((M1 & 0x7fff) + 1)) < 1.0
elif self.output_type.dtype == 'float32':
otype = 'float'
mask = ''
offset = ''
NORM = '4.6566126e-10f' # numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
elif self.output_type.dtype == 'float64':
otype = 'double'
mask = ''
offset = ''
NORM = '4.656612873077392578125e-10'
else:
raise ValueError('Unsupported data type for output',
......@@ -143,11 +146,11 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
x21 = y2;
if (x11 <= x21) {
sample_data[i] = %(write)s(((x11 - x21 + M1) %(mask)s) * %(NORM)s);
sample_data[i] = %(write)s((((x11 - x21 + M1) %(mask)s) %(offset)s) * %(NORM)s);
}
else
{
sample_data[i] = %(write)s(((x11 - x21) %(mask)s) * %(NORM)s);
sample_data[i] = %(write)s((((x11 - x21) %(mask)s) %(offset)s) * %(NORM)s);
}
}
......@@ -299,7 +302,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
""" % dict(fail=sub['fail']))
def c_code_cache_version(self):
return (15,)
return (16,)
@register_opt2([mrg_uniform], 'fast_compile')
......
......@@ -234,7 +234,7 @@ def ff_2p72(rstate):
return multMatVect(rstate, A1p72, M1, A2p72, M2)
def mrg_next_value(rstate, new_rstate):
def mrg_next_value(rstate, new_rstate, NORM, mask, offset):
# TODO : need description for method, parameter and return
x11, x12, x13, x21, x22, x23 = rstate
assert type(x11) == np.int32
......@@ -279,9 +279,9 @@ def mrg_next_value(rstate, new_rstate):
new_rstate[...] = [x11, x12, x13, x21, x22, x23]
assert new_rstate.dtype == np.int32
if (x11 <= x21):
return (x11 - x21 + M1) * NORM
return (((x11 - x21 + M1) & mask) + offset) * NORM
else:
return (x11 - x21) * NORM
return (((x11 - x21) & mask) + offset) * NORM
class mrg_uniform_base(Op):
......@@ -330,6 +330,7 @@ class mrg_uniform_base(Op):
class mrg_uniform(mrg_uniform_base):
# CPU VERSION
_f16_ok = True
def make_node(self, rstate, size):
# error checking slightly redundant here, since
......@@ -374,12 +375,25 @@ class mrg_uniform(mrg_uniform_base):
n_streams, _ = rstate.shape
rval = np.zeros(n_elements, dtype=self.output_type.dtype)
if rval.dtype == 'float16':
mask = 0x7fff
offset = 1
NORM = np.float16(3.0458e-05)
elif rval.dtype == 'float32':
mask = 0xffffffff
offset = 0
NORM = np.float32(4.6566126e-10)
elif rval.dtype == 'float64':
mask = 0xffffffff
offset = 0
NORM = 4.656612873077392578125e-10 # 1./2^31
err_orig = np.seterr(over='ignore')
try:
for i in xrange(n_elements):
sample = mrg_next_value(rstate[i % n_streams],
rstate[i % n_streams])
rstate[i % n_streams],
NORM=NORM, mask=mask, offset=offset)
rval[i] = sample
finally:
np.seterr(**err_orig)
......@@ -476,6 +490,9 @@ class mrg_uniform(mrg_uniform_base):
# TensorType, something is wrong (likely one of the GPU ops
# not defining C code correctly).
assert isinstance(node.inputs[0].type, TensorType)
if self.output_type.dtype == 'float16':
# C code is not tested, fall back to Python
super(mrg_uniform, self).c_code(node, name, inp, out, sub)
return """
//////// <code generated by mrg_uniform>
npy_int64 odims_i;
......@@ -592,7 +609,7 @@ class mrg_uniform(mrg_uniform_base):
""" % dict(fail=sub['fail']))
def c_code_cache_version(self):
return (9,)
return (10,)
def guess_n_streams(size, warn=False):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论