提交 4b634d24 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6097 from lamblin/mrg_uniform_f16

Make sure MRG uniform in float16 do not return 0
......@@ -61,18 +61,21 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
otype = 'ga_half'
# limit the values of the state that we use.
mask = '& 0x7fff'
NORM = '3.0518e-05f' # numpy.float16(1.0/(2**15+8))
offset = '+ 1'
NORM = '3.0458e-05f' # numpy.float16(1.0/(2**15+33))
# this was determined by finding the biggest number such that
# numpy.float16(number * (M1 & 0x7fff)) < 1.0
# numpy.float16(number * ((M1 & 0x7fff) + 1)) < 1.0
elif self.output_type.dtype == 'float32':
otype = 'float'
mask = ''
offset = ''
NORM = '4.6566126e-10f' # numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
elif self.output_type.dtype == 'float64':
otype = 'double'
mask = ''
offset = ''
NORM = '4.656612873077392578125e-10'
else:
raise ValueError('Unsupported data type for output',
......@@ -143,11 +146,11 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
x21 = y2;
if (x11 <= x21) {
sample_data[i] = %(write)s(((x11 - x21 + M1) %(mask)s) * %(NORM)s);
sample_data[i] = %(write)s((((x11 - x21 + M1) %(mask)s) %(offset)s) * %(NORM)s);
}
else
{
sample_data[i] = %(write)s(((x11 - x21) %(mask)s) * %(NORM)s);
sample_data[i] = %(write)s((((x11 - x21) %(mask)s) %(offset)s) * %(NORM)s);
}
}
......@@ -299,7 +302,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
""" % dict(fail=sub['fail']))
def c_code_cache_version(self):
return (15,)
return (16,)
@register_opt2([mrg_uniform], 'fast_compile')
......
......@@ -9,6 +9,7 @@ from theano.configparser import change_flags
from theano.sandbox import rng_mrg
from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.sandbox.tests.test_rng_mrg import java_samples, rng_mrg_overflow
from theano.sandbox.tests.test_rng_mrg import test_f16_nonzero as cpu_f16_nonzero
from theano.tests import unittest_tools as utt
from .config import mode_with_gpu as mode
......@@ -162,3 +163,7 @@ def test_validate_input_types_gpuarray_backend():
rstate = np.zeros((7, 6), dtype="int32")
rstate = gpuarray_shared_constructor(rstate)
rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3,))
def test_f16_nonzero():
cpu_f16_nonzero(mode=mode, op_to_check=GPUA_mrg_uniform)
......@@ -234,7 +234,7 @@ def ff_2p72(rstate):
return multMatVect(rstate, A1p72, M1, A2p72, M2)
def mrg_next_value(rstate, new_rstate):
def mrg_next_value(rstate, new_rstate, NORM, mask, offset):
# TODO : need description for method, parameter and return
x11, x12, x13, x21, x22, x23 = rstate
assert type(x11) == np.int32
......@@ -279,9 +279,9 @@ def mrg_next_value(rstate, new_rstate):
new_rstate[...] = [x11, x12, x13, x21, x22, x23]
assert new_rstate.dtype == np.int32
if (x11 <= x21):
return (x11 - x21 + M1) * NORM
return (((x11 - x21 + M1) & mask) + offset) * NORM
else:
return (x11 - x21) * NORM
return (((x11 - x21) & mask) + offset) * NORM
class mrg_uniform_base(Op):
......@@ -330,6 +330,7 @@ class mrg_uniform_base(Op):
class mrg_uniform(mrg_uniform_base):
# CPU VERSION
_f16_ok = True
def make_node(self, rstate, size):
# error checking slightly redundant here, since
......@@ -374,12 +375,25 @@ class mrg_uniform(mrg_uniform_base):
n_streams, _ = rstate.shape
rval = np.zeros(n_elements, dtype=self.output_type.dtype)
if rval.dtype == 'float16':
mask = 0x7fff
offset = 1
NORM = np.float16(3.0458e-05)
elif rval.dtype == 'float32':
mask = 0xffffffff
offset = 0
NORM = np.float32(4.6566126e-10)
elif rval.dtype == 'float64':
mask = 0xffffffff
offset = 0
NORM = 4.656612873077392578125e-10 # 1./2^31
err_orig = np.seterr(over='ignore')
try:
for i in xrange(n_elements):
sample = mrg_next_value(rstate[i % n_streams],
rstate[i % n_streams])
rstate[i % n_streams],
NORM=NORM, mask=mask, offset=offset)
rval[i] = sample
finally:
np.seterr(**err_orig)
......@@ -476,6 +490,9 @@ class mrg_uniform(mrg_uniform_base):
# TensorType, something is wrong (likely one of the GPU ops
# not defining C code correctly).
assert isinstance(node.inputs[0].type, TensorType)
if self.output_type.dtype == 'float16':
# C code is not tested, fall back to Python
super(mrg_uniform, self).c_code(node, name, inp, out, sub)
return """
//////// <code generated by mrg_uniform>
npy_int64 odims_i;
......@@ -592,7 +609,7 @@ class mrg_uniform(mrg_uniform_base):
""" % dict(fail=sub['fail']))
def c_code_cache_version(self):
return (9,)
return (10,)
def guess_n_streams(size, warn=False):
......
......@@ -742,6 +742,16 @@ def test_undefined_grad():
(avg, std))
def test_f16_nonzero(mode=None, op_to_check=rng_mrg.mrg_uniform):
srng = MRG_RandomStreams(seed=utt.fetch_seed())
m = srng.uniform(size=(1000, 1000), dtype='float16')
assert m.dtype == 'float16', m.type
f = theano.function([], m, mode=mode)
assert any(isinstance(n.op, op_to_check) for n in f.maker.fgraph.apply_nodes)
m_val = f()
assert np.all((0 < m_val) & (m_val < 1))
if __name__ == "__main__":
rng = MRG_RandomStreams(np.random.randint(2147462579))
print(theano.__file__)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论