提交 29cbd921 authored 作者: James Bergstra's avatar James Bergstra

rng_mrg - added C implementation for uniform

上级 7999e2d3
...@@ -9,7 +9,7 @@ import sys ...@@ -9,7 +9,7 @@ import sys
import numpy import numpy
from theano import Op, Apply, shared, config from theano import Op, Apply, shared, config
from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast
def mulmod(a, b, c, m): def mulmod(a, b, c, m):
r = numpy.int32(numpy.int64(a*b + c) % m) r = numpy.int32(numpy.int64(a*b + c) % m)
...@@ -118,6 +118,8 @@ class mrg_uniform(Op): ...@@ -118,6 +118,8 @@ class mrg_uniform(Op):
def __init__(self, output_type, inplace=False): def __init__(self, output_type, inplace=False):
self.output_type = output_type self.output_type = output_type
self.inplace=inplace self.inplace=inplace
if inplace:
self.destroy_map = {0:[0]}
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
...@@ -128,18 +130,18 @@ class mrg_uniform(Op): ...@@ -128,18 +130,18 @@ class mrg_uniform(Op):
return hash(type(self)) ^ hash(self.output_type) ^ hash(self.inplace) return hash(type(self)) ^ hash(self.output_type) ^ hash(self.inplace)
@classmethod @classmethod
def new(cls, rstate, ndim, dtype, size, low, high): def new(cls, rstate, ndim, dtype, size):
v_size = as_tensor_variable(size) v_size = as_tensor_variable(size)
if ndim is None: if ndim is None:
ndim = get_vector_length(v_size) ndim = get_vector_length(v_size)
op = cls(TensorType(dtype, (False,)*ndim)) op = cls(TensorType(dtype, (False,)*ndim))
return op(rstate, v_size, as_tensor_variable(low), as_tensor_variable(high)) return op(rstate, cast(v_size, 'int32'))
def make_node(self, rstate, size, low, high): def make_node(self, rstate, size):
return Apply(self, return Apply(self,
[rstate, size, low, high], [rstate, size],
[rstate.type(), self.output_type()]) [rstate.type(), self.output_type()])
def perform(self, node, (rstate, size, low, high), (o_rstate, o_sample)): def perform(self, node, (rstate, size), (o_rstate, o_sample)):
n_elements = 1 n_elements = 1
if not self.inplace: if not self.inplace:
rstate = rstate.copy() rstate = rstate.copy()
...@@ -158,6 +160,163 @@ class mrg_uniform(Op): ...@@ -158,6 +160,163 @@ class mrg_uniform(Op):
o_rstate[0] = rstate.copy() o_rstate[0] = rstate.copy()
o_sample[0] = rval.reshape(size) o_sample[0] = rval.reshape(size)
def c_code_cache_version(self):
return ()
def c_code(self, node, name, (rstate, size), (o_rstate, o_sample), sub):
if self.inplace:
o_rstate_requirement = 'NPY_C_CONTIGUOUS|NPY_ALIGNED'
else:
o_rstate_requirement = 'NPY_ENSURECOPY|NPY_C_CONTIGUOUS|NPY_ALIGNED'
ndim = self.output_type.ndim
o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
fail = sub['fail']
if self.output_type.dtype == 'float32':
otype = 'float'
NORM = '4.6566126e-10f' #numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
else:
otype = 'double'
NORM = '4.656612873077392578125e-10'
return """
//////// <code generated by mrg_uniform>
npy_intp odims[%(ndim)s];
int n_elements = 1;
int n_streams = 0;
int must_alloc_sample = ((NULL == %(o_sample)s) || (%(o_sample)s->nd != %(ndim)s));
%(otype)s * sample_data;
npy_int32 * state_data;
const npy_int32 i0 = 0;
const npy_int32 i7 = 7;
const npy_int32 i9 = 9;
const npy_int32 i15 = 15;
const npy_int32 i16 = 16;
const npy_int32 i22 = 22;
const npy_int32 i24 = 24;
const npy_int32 M1 = 2147483647; //2^31 - 1
const npy_int32 M2 = 2147462579; //2^31 - 21069
const npy_int32 MASK12 = 511; //2^9 - 1
const npy_int32 MASK13 = 16777215; //2^24 - 1
const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MULT2 = 21069;
if (%(size)s->nd != 1)
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s
}
if (%(size)s->dimensions[0] != %(ndim)s)
{
PyErr_Format(PyExc_ValueError, "size must have length %%i", %(ndim)s);
%(fail)s
}
if (%(size)s->descr->type_num != PyArray_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s
}
for (int i = 0; i < %(ndim)s; ++i)
{
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (%(o_sample)s->dimensions[i] != odims[i]);
//fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
// TODO CHECK STRIDES OF o_sample?
}
if (must_alloc_sample)
{
Py_XDECREF(%(o_sample)s);
%(o_sample)s = (PyArrayObject*)PyArray_SimpleNew(%(ndim)s, odims, %(o_type_num)s);
if(!%(o_sample)s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc mrg_uniform output");
%(fail)s
}
}
Py_XDECREF(%(o_rstate)s);
%(o_rstate)s = (PyArrayObject*)PyArray_FromAny(py_%(rstate)s, NULL, 0, 0, %(o_rstate_requirement)s,NULL);
if (%(o_rstate)s->nd != 2)
{
PyErr_SetString(PyExc_ValueError, "rstate must be matrix");
%(fail)s
}
if (%(o_rstate)s->dimensions[1] != 6)
{
PyErr_Format(PyExc_ValueError, "rstate must have 6 columns");
%(fail)s
}
if (%(o_rstate)s->descr->type_num != PyArray_INT32)
{
PyErr_SetString(PyExc_ValueError, "rstate must be int32");
%(fail)s
}
n_streams = %(o_rstate)s->dimensions[0];
sample_data = (%(otype)s *) %(o_sample)s->data;
state_data = (npy_int32 *) %(o_rstate)s->data;
for (int i = 0; i < n_elements; ++i)
{
npy_int32 * state_data_i = state_data + (i%%n_streams)*6;
npy_int32 y1, y2, x11, x12, x13, x21, x22, x23;
x11 = state_data_i[0];
x12 = state_data_i[1];
x13 = state_data_i[2];
x21 = state_data_i[3];
x22 = state_data_i[4];
x23 = state_data_i[5];
y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24);
if ((y1 < 0 || y1 >= M1)) //must also check overflow
y1 -= M1;
y1 += x13;
if ((y1 < 0 or y1 >= M1))
y1 -= M1;
x13 = x12;
x12 = x11;
x11 = y1;
y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
if (y1 < 0 || y1 >= M2)
y1 -= M2;
y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += x23;
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += y1;
if (y2 < 0 or y2 >= M2)
y2 -= M2;
x23 = x22;
x22 = x21;
x21 = y2;
if (x11 <= x21) {
assert((x11 - x21 + M1) <= M1);
sample_data[i] = (x11 - x21 + M1) * %(NORM)s;
}
else
{
assert(x11 - x21 <= M1);
sample_data[i] = (x11 - x21) * %(NORM)s;
}
state_data_i[0]= x11;
state_data_i[1]= x12;
state_data_i[2]= x13;
state_data_i[3]= x21;
state_data_i[4]= x22;
state_data_i[5]= x23;
}
//////// </ code generated by mrg_uniform>
""" %locals()
class MRG_RandomStreams(object): class MRG_RandomStreams(object):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)""" """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
...@@ -213,27 +372,60 @@ class MRG_RandomStreams(object): ...@@ -213,27 +372,60 @@ class MRG_RandomStreams(object):
information. information.
""" """
node_rstate = shared(self.get_substream_rstates(self.n_streams(size))) node_rstate = shared(self.get_substream_rstates(self.n_streams(size)))
return self.pretty_return(node_rstate, u = self.pretty_return(node_rstate,
*mrg_uniform.new(node_rstate, ndim, dtype, size, low, high)) *mrg_uniform.new(node_rstate, ndim, dtype, size))
r = u * (high-low) + low
if u.type.broadcastable != r.type.broadcastable:
raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments')
return r
# #
# #
# #
# #
# #
import time
import theano import theano
def test_rng0(): def test_rng0():
def basictest(f, steps, prefix=""):
t0 = time.time()
l = [f() for i in xrange(steps)]
tt = time.time()
mean, std, min, max = numpy.mean(l), numpy.std(l), numpy.min(l), numpy.max(l)
print prefix, 'mean', mean
print prefix, 'std', std
print prefix, 'min', repr(min)
print prefix, 'max', repr(max)
print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / (tt-t0)
assert max < 1.0
assert min >= 0.0
assert abs(mean - 0.5) < .01, 'bad mean?'
R = MRG_RandomStreams(234) R = MRG_RandomStreams(234)
u = R.uniform(size=(2,2), low=0, high=55) sample_size = (200,20)
u = R.uniform(size=sample_size)
print "U dtype", u.dtype
f = theano.function([], u) f = theano.function([], u)
print 'random?', f() print 'random?', f()[0]
print 'random?', f() print 'random?', f()[0]
basictest(f, 1000, prefix='mrg ')
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
uu = RR.uniform(size=sample_size)
ff = theano.function([], uu)
l = [f() for i in xrange(1000)] basictest(ff, 1000, prefix='numpy')
print 'mean', numpy.mean(l), numpy.std(l) / numpy.sqrt(1000)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论