提交 134d270d authored 作者: carriepl's avatar carriepl

Merge pull request #3510 from aalmah/ticket_3359

Add n>1 experiment in multinomial sampling on CPU
...@@ -4,6 +4,8 @@ import theano ...@@ -4,6 +4,8 @@ import theano
from theano import Op, Apply from theano import Op, Apply
import theano.tensor as T import theano.tensor as T
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.scalar import as_scalar
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp
if cuda_available: if cuda_available:
...@@ -33,7 +35,7 @@ class MultinomialFromUniform(Op): ...@@ -33,7 +35,7 @@ class MultinomialFromUniform(Op):
except AttributeError: except AttributeError:
self.odtype = 'auto' self.odtype = 'auto'
def make_node(self, pvals, unis): def make_node(self, pvals, unis, n=1):
pvals = T.as_tensor_variable(pvals) pvals = T.as_tensor_variable(pvals)
unis = T.as_tensor_variable(unis) unis = T.as_tensor_variable(unis)
if pvals.ndim != 2: if pvals.ndim != 2:
...@@ -45,18 +47,23 @@ class MultinomialFromUniform(Op): ...@@ -45,18 +47,23 @@ class MultinomialFromUniform(Op):
else: else:
odtype = self.odtype odtype = self.odtype
out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable) out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
return Apply(self, [pvals, unis], [out]) return Apply(self, [pvals, unis, as_scalar(n)], [out])
def grad(self, ins, outgrads): def grad(self, ins, outgrads):
pvals, unis = ins pvals, unis, n = ins
(gz,) = outgrads (gz,) = outgrads
return [T.zeros_like(x) for x in ins] return [T.zeros_like(x) for x in ins]
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (7,)
def c_code(self, node, name, ins, outs, sub): def c_code(self, node, name, ins, outs, sub):
# support old pickled graphs
if len(ins) == 2:
(pvals, unis) = ins (pvals, unis) = ins
n = 1
else:
(pvals, unis, n) = ins
(z,) = outs (z,) = outs
if self.odtype == 'auto': if self.odtype == 'auto':
t = "PyArray_TYPE(%(pvals)s)" % locals() t = "PyArray_TYPE(%(pvals)s)" % locals()
...@@ -79,9 +86,9 @@ class MultinomialFromUniform(Op): ...@@ -79,9 +86,9 @@ class MultinomialFromUniform(Op):
%(fail)s; %(fail)s;
} }
if (PyArray_DIMS(%(unis)s)[0] != PyArray_DIMS(%(pvals)s)[0]) if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
{ {
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]"); PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
%(fail)s; %(fail)s;
} }
...@@ -91,7 +98,7 @@ class MultinomialFromUniform(Op): ...@@ -91,7 +98,7 @@ class MultinomialFromUniform(Op):
) )
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(2, %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
PyArray_DIMS(%(pvals)s), PyArray_DIMS(%(pvals)s),
%(t)s, %(t)s,
0); 0);
...@@ -106,20 +113,24 @@ class MultinomialFromUniform(Op): ...@@ -106,20 +113,24 @@ class MultinomialFromUniform(Op):
const int nb_multi = PyArray_DIMS(%(pvals)s)[0]; const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1]; const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
const int n_samples = %(n)s;
// //
// For each multinomial, loop over each possible outcome // For each multinomial, loop over each possible outcome
// //
for (int c = 0; c < n_samples; ++c){
for (int n = 0; n < nb_multi; ++n) for (int n = 0; n < nb_multi; ++n)
{ {
int waiting = 1; int waiting = 1;
dtype_%(pvals)s cummul = 0.; dtype_%(pvals)s cummul = 0.;
const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, n); const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
for (int m = 0; m < nb_outcomes; ++m) for (int m = 0; m < nb_outcomes; ++m)
{ {
dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m); dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m);
const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m); const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m);
cummul += *pvals_nm; cummul += *pvals_nm;
if (c == 0)
{
if (waiting && (cummul > *unis_n)) if (waiting && (cummul > *unis_n))
{ {
*z_nm = 1.; *z_nm = 1.;
...@@ -131,17 +142,31 @@ class MultinomialFromUniform(Op): ...@@ -131,17 +142,31 @@ class MultinomialFromUniform(Op):
*z_nm = 0.; *z_nm = 0.;
} }
} }
else {
if (cummul > *unis_n)
{
*z_nm = *z_nm + 1.;
break;
}
}
}
}
} }
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
# support old pickled graphs
if len(ins) == 2:
(pvals, unis) = ins (pvals, unis) = ins
n_samples = 1
else:
(pvals, unis, n_samples) = ins
(z,) = outs (z,) = outs
if unis.shape[0] != pvals.shape[0]: if unis.shape[0] != pvals.shape[0] * n_samples:
raise ValueError("unis.shape[0] != pvals.shape[0]", raise ValueError("unis.shape[0] != pvals.shape[0] * n_samples",
unis.shape[0], pvals.shape[0]) unis.shape[0], pvals.shape[0], n_samples)
if z[0] is None or z[0].shape != pvals.shape: if z[0] is None or z[0].shape != pvals.shape:
z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype) z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype)
...@@ -149,6 +174,7 @@ class MultinomialFromUniform(Op): ...@@ -149,6 +174,7 @@ class MultinomialFromUniform(Op):
nb_outcomes = pvals.shape[1] nb_outcomes = pvals.shape[1]
# For each multinomial, loop over each possible outcome # For each multinomial, loop over each possible outcome
for c in range(n_samples):
for n in range(nb_multi): for n in range(nb_multi):
waiting = True waiting = True
cummul = 0 cummul = 0
...@@ -156,11 +182,16 @@ class MultinomialFromUniform(Op): ...@@ -156,11 +182,16 @@ class MultinomialFromUniform(Op):
for m in range(nb_outcomes): for m in range(nb_outcomes):
cummul += pvals[n, m] cummul += pvals[n, m]
if c == 0:
if (waiting and (cummul > unis_n)): if (waiting and (cummul > unis_n)):
z[0][n, m] = 1 z[0][n, m] = 1
waiting = False waiting = False
else: else:
z[0][n, m] = 0 z[0][n, m] = 0
else:
if (cummul > unis_n):
z[0][n, m] += 1
break
class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
...@@ -346,7 +377,16 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -346,7 +377,16 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
@local_optimizer([MultinomialFromUniform]) @local_optimizer([MultinomialFromUniform])
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform: if type(node.op) is MultinomialFromUniform:
if len(node.inputs) == 2:
p, u = node.inputs p, u = node.inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32' and if (p.dtype == u.dtype == m.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, any([i.owner and isinstance(i.owner.op,
...@@ -354,16 +394,25 @@ def local_gpu_multinomial(node): ...@@ -354,16 +394,25 @@ def local_gpu_multinomial(node):
for i in node.inputs])): for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype) gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i) return [host_from_gpu(gpu_op(*[gpu_from_host(i)
for i in node.inputs])).T] for i in [p, u]])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and node.inputs[0].owner and
type(node.inputs[0].owner.op) is MultinomialFromUniform): type(node.inputs[0].owner.op) is MultinomialFromUniform):
multi = node.inputs[0].owner multi = node.inputs[0].owner
p, u = multi.inputs if len(node.inputs) == 2:
p, u = node.inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = multi.outputs m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'): if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype) gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T
# The dimshuffle is on the cpu, but will be moved to the # The dimshuffle is on the cpu, but will be moved to the
# gpu by an opt. # gpu by an opt.
return [gpu_from_host(ret)] return [gpu_from_host(ret)]
......
...@@ -19,7 +19,6 @@ from theano.tensor import (raw_random, TensorType, as_tensor_variable, ...@@ -19,7 +19,6 @@ from theano.tensor import (raw_random, TensorType, as_tensor_variable,
from theano.tensor import sqrt, log, sin, cos, join, prod from theano.tensor import sqrt, log, sin, cos, join, prod
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer from theano.gof import local_optimizer
from . import multinomial from . import multinomial
from theano.sandbox.cuda import cuda_available, cuda_enabled, GpuOp from theano.sandbox.cuda import cuda_available, cuda_enabled, GpuOp
...@@ -1318,11 +1317,12 @@ class MRG_RandomStreams(object): ...@@ -1318,11 +1317,12 @@ class MRG_RandomStreams(object):
def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64', def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
nstreams=None): nstreams=None):
""" """
Sample `n` (currently `n` needs to be 1) times from a multinomial Sample `n` (`n` needs to be >= 1, default 1) times from a multinomial
distribution defined by probabilities pvals. distribution defined by probabilities pvals.
Example : pvals = [[.98, .01, .01], [.01, .98, .01]] will Example : pvals = [[.98, .01, .01], [.01, .49, .50]] and n=1 will
probably result in [[1,0,0],[0,1,0]]. probably result in [[1,0,0],[0,0,1]]. When setting n=2, this
will probably result in [[2,0,0],[0,1,1]].
Notes Notes
----- -----
...@@ -1345,7 +1345,6 @@ class MRG_RandomStreams(object): ...@@ -1345,7 +1345,6 @@ class MRG_RandomStreams(object):
"The specified size contains a dimension with value <= 0", "The specified size contains a dimension with value <= 0",
size) size)
if n == 1 and pvals.ndim == 2:
if size is not None: if size is not None:
raise ValueError("Provided a size argument to " raise ValueError("Provided a size argument to "
"MRG_RandomStreams.multinomial, which does not use " "MRG_RandomStreams.multinomial, which does not use "
...@@ -1354,16 +1353,15 @@ class MRG_RandomStreams(object): ...@@ -1354,16 +1353,15 @@ class MRG_RandomStreams(object):
raise ValueError("Provided an ndim argument to " raise ValueError("Provided an ndim argument to "
"MRG_RandomStreams.multinomial, which does not use " "MRG_RandomStreams.multinomial, which does not use "
"the ndim argument.") "the ndim argument.")
ndim, size, bcast = raw_random._infer_ndim_bcast( if pvals.ndim == 2:
ndim, size, pvals[:, 0]) size = pvals[:,0].shape * n
assert ndim == 1
bcast = bcast + (pvals.type.broadcastable[-1],)
unis = self.uniform(size=size, ndim=1, nstreams=nstreams) unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
op = multinomial.MultinomialFromUniform(dtype) op = multinomial.MultinomialFromUniform(dtype)
return op(pvals, unis) n_samples = as_tensor_variable(n)
return op(pvals, unis, n_samples)
else: else:
raise NotImplementedError(("MRG_RandomStreams.multinomial only" raise NotImplementedError(("MRG_RandomStreams.multinomial only"
" implemented with n == 1 and pvals.ndim = 2")) " implemented for pvals.ndim = 2"))
def normal(self, size, avg=0.0, std=1.0, ndim=None, def normal(self, size, avg=0.0, std=1.0, ndim=None,
dtype=None, nstreams=None): dtype=None, nstreams=None):
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -8,7 +8,10 @@ from theano.sandbox import multinomial ...@@ -8,7 +8,10 @@ from theano.sandbox import multinomial
from theano.compile.mode import get_default_mode, predefined_linkers from theano.compile.mode import get_default_mode, predefined_linkers
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
import six.moves.cPickle as pickle
import os
from theano.compat import PY3
from theano.misc.pkl_utils import CompatUnpickler
def get_mode(gpu): def get_mode(gpu):
mode = get_default_mode() mode = get_default_mode()
...@@ -29,6 +32,67 @@ def run_with_c(f, gpu=False): ...@@ -29,6 +32,67 @@ def run_with_c(f, gpu=False):
f(mode, gpu) f(mode, gpu)
def test_n_samples_1():
p = tensor.fmatrix()
u = tensor.fvector()
n = tensor.iscalar()
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
f = function([p, u, n], m, allow_input_downcast=True)
numpy.random.seed(12345)
for i in [1, 5, 10, 100, 1000, 10000]:
uni = numpy.random.rand(2*i).astype(config.floatX)
res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
utt.assert_allclose(res, [[i*1.0, 0.0], [0.0, i*1.0]])
def test_n_samples_2():
p = tensor.fmatrix()
u = tensor.fvector()
n = tensor.iscalar()
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
f = function([p, u, n], m, allow_input_downcast=True)
numpy.random.seed(12345)
for i in [1, 5, 10, 100, 1000]:
uni = numpy.random.rand(i).astype(config.floatX)
pvals = numpy.random.randint(1,1000,(1,1000)).astype(config.floatX)
pvals /= pvals.sum(1)
res = f(pvals, uni, i)
assert res.sum() == i
for i in [1, 5, 10, 100, 1000]:
uni = numpy.random.rand(i).astype(config.floatX)
pvals = numpy.random.randint(1,1000000,(1,1000000)).astype(config.floatX)
pvals /= pvals.sum(1)
res = f(pvals, uni, i)
assert res.sum() == i
def test_n_samples_compatibility():
"""
This test checks if the new change to MultinomialFromUniform is still compatible
with old interface. Here I will load a graph created (using the old interface) as follows:
RandomStreams = theano.sandbox.rng_mrg.MRG_RandomStreams
th_rng = RandomStreams(12345)
X = T.matrix('X')
pvals = T.exp(X)
pvals = pvals / pvals.sum(axis=1, keepdims=True)
samples = th_rng.multinomial(pvals=pvals)
pickle.dump([X, samples], open("multinomial_test_graph.pkl", "w"))
"""
folder = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(folder, "multinomial_test_graph.pkl"), "rb") as pkl_file:
if PY3:
u = CompatUnpickler(pkl_file, encoding="latin1")
else:
u = CompatUnpickler(pkl_file)
X, samples = u.load()
f = theano.function([X], samples)
res = f(numpy.random.randn(20,10))
assert numpy.all(res.sum(axis=1) == 1)
def test_multinomial_0(): def test_multinomial_0():
# This tests the MultinomialFromUniform Op directly, not going through the # This tests the MultinomialFromUniform Op directly, not going through the
# multinomial() call in GPU random generation. # multinomial() call in GPU random generation.
...@@ -41,6 +105,7 @@ def test_multinomial_0(): ...@@ -41,6 +105,7 @@ def test_multinomial_0():
def body(mode, gpu): def body(mode, gpu):
# the m*2 allows the multinomial to reuse output # the m*2 allows the multinomial to reuse output
f = function([p, u], m*2, allow_input_downcast=True, mode=mode) f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
if gpu: if gpu:
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()]) for node in f.maker.fgraph.toposort()])
......
...@@ -772,7 +772,7 @@ def test_normal0(): ...@@ -772,7 +772,7 @@ def test_normal0():
prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol) prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
def basic_multinomialtest(f, steps, sample_size, target_pvals, def basic_multinomialtest(f, steps, sample_size, target_pvals, n_samples,
prefix="", mean_rtol=0.04): prefix="", mean_rtol=0.04):
dt = 0.0 dt = 0.0
...@@ -782,10 +782,12 @@ def basic_multinomialtest(f, steps, sample_size, target_pvals, ...@@ -782,10 +782,12 @@ def basic_multinomialtest(f, steps, sample_size, target_pvals,
t0 = time.time() t0 = time.time()
ival = f() ival = f()
assert ival.shape == sample_size assert ival.shape == sample_size
assert numpy.all(numpy.sum(ival, axis=1) == n_samples)
dt += time.time() - t0 dt += time.time() - t0
#ival = numpy.asarray(ival)
avg_pvals += ival avg_pvals += ival
avg_pvals /= steps avg_pvals /= (steps * n_samples)
assert numpy.mean(abs(avg_pvals - target_pvals)) < mean_rtol
print('random?[:10]\n', numpy.asarray(f()[:10])) print('random?[:10]\n', numpy.asarray(f()[:10]))
print(prefix, 'mean', avg_pvals) print(prefix, 'mean', avg_pvals)
...@@ -797,7 +799,6 @@ def basic_multinomialtest(f, steps, sample_size, target_pvals, ...@@ -797,7 +799,6 @@ def basic_multinomialtest(f, steps, sample_size, target_pvals,
def test_multinomial(): def test_multinomial():
steps = 100 steps = 100
mode_ = mode mode_ = mode
if mode == 'FAST_COMPILE': if mode == 'FAST_COMPILE':
...@@ -820,7 +821,7 @@ def test_multinomial(): ...@@ -820,7 +821,7 @@ def test_multinomial():
f = theano.function([], m, mode=mode_) f = theano.function([], m, mode=mode_)
# theano.printing.debugprint(f) # theano.printing.debugprint(f)
out = f() out = f()
basic_multinomialtest(f, steps, sample_size, pvals, prefix='mrg ') basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix='mrg ')
sys.stdout.flush() sys.stdout.flush()
...@@ -841,10 +842,46 @@ def test_multinomial(): ...@@ -841,10 +842,46 @@ def test_multinomial():
# theano.printing.debugprint(f) # theano.printing.debugprint(f)
gpu_out = f() gpu_out = f()
sys.stdout.flush() sys.stdout.flush()
basic_multinomialtest(f, steps, sample_size, pvals, prefix='gpu mrg ') basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix='gpu mrg ')
numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6) numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6)
def test_multinomial_n_samples():
mode_ = mode
if mode == 'FAST_COMPILE':
mode_ = 'FAST_RUN'
if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
mode == 'Mode' and config.linker in ['py']):
sample_size = (49, 5)
else:
sample_size = (450, 6)
mode_ = theano.compile.mode.get_mode(mode_)
pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals)
R = MRG_RandomStreams(234, use_cuda=False)
for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
m = R.multinomial(pvals=pvals, n=n_samples, dtype=config.floatX, nstreams=30 * 256)
f = theano.function([], m, mode=mode_)
basic_multinomialtest(f, steps, sample_size, pvals, n_samples, prefix='mrg ')
sys.stdout.flush()
if mode != 'FAST_COMPILE' and cuda_available:
R = MRG_RandomStreams(234, use_cuda=True)
pvals = numpy.asarray(pvals, dtype='float32')
n = R.multinomial(pvals=pvals, n=n_samples, dtype='float32', nstreams=30 * 256)
assert n.dtype == 'float32'
f = theano.function(
[],
theano.sandbox.cuda.basic_ops.gpu_from_host(n),
mode=mode_.including('gpu'))
sys.stdout.flush()
basic_multinomialtest(f, steps, sample_size, pvals, n_samples, prefix='gpu mrg ')
class T_MRG(unittest.TestCase): class T_MRG(unittest.TestCase):
def test_bad_size(self): def test_bad_size(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论