提交 757a24d5 authored 作者: Frederic's avatar Frederic

Make python version of rng_mrg faster.

-move to outer function call to mp.seterr -Don't always create the same constant -inline a function call in the inner for loop.
上级 99263fbf
......@@ -24,31 +24,28 @@ if cuda_available:
float32_shared_constructor)
def mulmod(a, b, c, m):
r = numpy.int32((numpy.int64(a)*b + c) % m)
if r >= 0:
return r
else:
return r+m
def matVecModM(A, s, m):
# return (A * s) % m
err_orig = numpy.seterr(over='ignore')
try:
x = numpy.zeros_like(s)
for i in xrange(len(x)):
for j in xrange(len(s)):
x[i] = mulmod(A[i][j], s[j], x[i], m)
r = numpy.int32((numpy.int64(A[i][j]) * s[j] + x[i]) % m)
if r >= 0:
x[i] = r
else:
x[i] = r + m
return x
finally:
numpy.seterr(**err_orig)
def multMatVect(v, A, m1, B, m2):
#multiply the first half of v by A with a modulo of m1
#and the second half by B with a modulo of m2
err_orig = numpy.seterr(over='ignore')
try:
r = numpy.zeros_like(v)
r[:3] = matVecModM(A, v[:3], m1)
r[3:] = matVecModM(B, v[3:], m2)
finally:
numpy.seterr(**err_orig)
return r
......@@ -80,6 +77,7 @@ A2p134 = numpy.asarray(
[[796789021, 1464208080, 607337906],
[1241679051, 1431130166, 1464208080],
[1401213391, 1178684362, 1431130166]])
np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
def ff_2p134(rstate):
return multMatVect(rstate, A1p134, M1, A2p134, M2)
......@@ -87,15 +85,13 @@ def ff_2p134(rstate):
def ff_2p72(rstate):
return multMatVect(rstate, A1p72, M1, A2p72, M2)
def mrg_next_value(rstate, new_rstate):
err_orig = numpy.seterr(over='ignore')
try:
x11, x12, x13, x21, x22, x23 = rstate
assert type(x11) == numpy.int32
i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i)
for i in (0,7, 9, 15, 16, 22, 24)]
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
#first component
y1 = (((x12 & MASK12) << i22) + (x12 >> i9)
+ ((x13 & MASK13) << i7) + (x13 >> i24))
......@@ -138,8 +134,6 @@ def mrg_next_value(rstate, new_rstate):
return (x11 - x21 + M1) * NORM
else:
return (x11 - x21) * NORM
finally:
numpy.seterr(**err_orig)
class mrg_uniform_base(Op):
def __init__(self, output_type, inplace=False):
......@@ -211,9 +205,13 @@ class mrg_uniform(mrg_uniform_base):
rval = numpy.zeros(n_elements, dtype=self.output_type.dtype)
err_orig = numpy.seterr(over='ignore')
try:
for i in xrange(n_elements):
sample = mrg_next_value(rstate[i%n_streams], rstate[i%n_streams])
rval[i] = sample
finally:
numpy.seterr(**err_orig)
o_rstate[0] = node.outputs[0].type.filter(rstate) # send to GPU if necessary
o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))# send to GPU if necessary
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论