Merge pull request #3510 from aalmah/ticket_3359

Add n>1 experiment in multinomial sampling on CPU

Merge pull request #3510 from aalmah/ticket_3359
134d270d · carriepl · 0db3f9af · 8010cfcb · 134d270d · 134d270d
--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -4,6 +4,8 @@ import theano
 from theano import Op, Apply
 import theano.tensor as T
 from theano.gof import local_optimizer
+from theano.tensor import NotScalarConstantError, get_scalar_constant_value
+from theano.scalar import as_scalar
 from theano.sandbox.cuda import cuda_available, GpuOp
 if cuda_available:
@@ -33,7 +35,7 @@ class MultinomialFromUniform(Op):
        except AttributeError:
            self.odtype = 'auto'
-    def make_node(self, pvals, unis):
+    def make_node(self, pvals, unis, n=1):
        pvals = T.as_tensor_variable(pvals)
        unis = T.as_tensor_variable(unis)
        if pvals.ndim != 2:
@@ -45,18 +47,23 @@ class MultinomialFromUniform(Op):
        else:
            odtype = self.odtype
        out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
-        return Apply(self, [pvals, unis], [out])
+        return Apply(self, [pvals, unis, as_scalar(n)], [out])
    def grad(self, ins, outgrads):
-        pvals, unis = ins
+        pvals, unis, n = ins
        (gz,) = outgrads
        return [T.zeros_like(x) for x in ins]
    def c_code_cache_version(self):
-        return (6,)
+        return (7,)
    def c_code(self, node, name, ins, outs, sub):
+        # support old pickled graphs
+        if len(ins) == 2:
            (pvals, unis) = ins
+            n = 1
+        else:
+            (pvals, unis, n) = ins
        (z,) = outs
        if self.odtype == 'auto':
            t = "PyArray_TYPE(%(pvals)s)" % locals()
@@ -79,9 +86,9 @@ class MultinomialFromUniform(Op):
            %(fail)s;
        }
-        if (PyArray_DIMS(%(unis)s)[0] != PyArray_DIMS(%(pvals)s)[0])
+        if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
        {
-            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]");
+            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
            %(fail)s;
        }
@@ -91,7 +98,7 @@ class MultinomialFromUniform(Op):
        )
        {
            Py_XDECREF(%(z)s);
-            %(z)s = (PyArrayObject*) PyArray_ZEROS(2,
+            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
                PyArray_DIMS(%(pvals)s),
                %(t)s,
                0);
@@ -106,20 +113,24 @@ class MultinomialFromUniform(Op):
        const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
        const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
+        const int n_samples = %(n)s;
        //
        // For each multinomial, loop over each possible outcome
        //
+        for (int c = 0; c < n_samples; ++c){
            for (int n = 0; n < nb_multi; ++n)
            {
                int waiting = 1;
                dtype_%(pvals)s cummul = 0.;
-            const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, n);
+                const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
                for (int m = 0; m < nb_outcomes; ++m)
                {
                    dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m);
                    const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m);
                    cummul += *pvals_nm;
+                    if (c == 0)
+                    {
                        if (waiting && (cummul > *unis_n))
                        {
                            *z_nm = 1.;
@@ -131,17 +142,31 @@ class MultinomialFromUniform(Op):
                            *z_nm = 0.;
                        }
                    }
+                    else {
+                        if (cummul > *unis_n)
+                        {
+                            *z_nm = *z_nm + 1.;
+                            break;
+                        }
+                    }
+                }
+            }
        }
        } // END NESTED SCOPE
        """ % locals()
    def perform(self, node, ins, outs):
+        # support old pickled graphs
+        if len(ins) == 2:
            (pvals, unis) = ins
+            n_samples = 1
+        else:
+            (pvals, unis, n_samples) = ins
        (z,) = outs
-        if unis.shape[0] != pvals.shape[0]:
+        if unis.shape[0] != pvals.shape[0] * n_samples:
-            raise ValueError("unis.shape[0] != pvals.shape[0]",
+            raise ValueError("unis.shape[0] != pvals.shape[0] * n_samples",
-                             unis.shape[0], pvals.shape[0])
+                             unis.shape[0], pvals.shape[0], n_samples)
        if z[0] is None or z[0].shape != pvals.shape:
            z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype)
@@ -149,6 +174,7 @@ class MultinomialFromUniform(Op):
        nb_outcomes = pvals.shape[1]
        # For each multinomial, loop over each possible outcome
+        for c in range(n_samples):
            for n in range(nb_multi):
                waiting = True
                cummul = 0
@@ -156,11 +182,16 @@ class MultinomialFromUniform(Op):
                for m in range(nb_outcomes):
                    cummul += pvals[n, m]
+                    if c == 0:
                        if (waiting and (cummul > unis_n)):
                            z[0][n, m] = 1
                            waiting = False
                        else:
                            z[0][n, m] = 0
+                    else:
+                        if (cummul > unis_n):
+                            z[0][n, m] += 1
+                            break
 class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
@@ -346,7 +377,16 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
 @local_optimizer([MultinomialFromUniform])
 def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
+        if len(node.inputs) == 2:
            p, u = node.inputs
+            n_samples = 1
+        else:
+            p, u, n_samples = node.inputs
+        try:
+            if get_scalar_constant_value(n_samples) != 1:
+                return None
+        except NotScalarConstantError:
+            return None
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and
            any([i.owner and isinstance(i.owner.op,
@@ -354,16 +394,25 @@ def local_gpu_multinomial(node):
                 for i in node.inputs])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
            return [host_from_gpu(gpu_op(*[gpu_from_host(i)
-                                           for i in node.inputs])).T]
+                                           for i in [p, u]])).T]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
            node.inputs[0].owner and
            type(node.inputs[0].owner.op) is MultinomialFromUniform):
        multi = node.inputs[0].owner
-        p, u = multi.inputs
+        if len(node.inputs) == 2:
+            p, u = node.inputs
+            n_samples = 1
+        else:
+            p, u, n_samples = node.inputs
+        try:
+            if get_scalar_constant_value(n_samples) != 1:
+                return None
+        except NotScalarConstantError:
+            return None
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
-            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
+            ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T
            # The dimshuffle is on the cpu, but will be moved to the
            # gpu by an opt.
            return [gpu_from_host(ret)]

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -19,7 +19,6 @@ from theano.tensor import (raw_random, TensorType, as_tensor_variable,
 from theano.tensor import sqrt, log, sin, cos, join, prod
 from theano.compile import optdb
 from theano.gof import local_optimizer
 from . import multinomial
 from theano.sandbox.cuda import cuda_available, cuda_enabled, GpuOp
@@ -1318,11 +1317,12 @@ class MRG_RandomStreams(object):
    def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
                    nstreams=None):
        """
-        Sample `n` (currently `n` needs to be 1) times from a multinomial
+        Sample `n` (`n` needs to be >= 1, default 1) times from a multinomial
        distribution defined by probabilities pvals.
-        Example : pvals = [[.98, .01, .01], [.01, .98, .01]] will
+        Example : pvals = [[.98, .01, .01], [.01, .49, .50]] and n=1 will
-        probably result in [[1,0,0],[0,1,0]].
+        probably result in [[1,0,0],[0,0,1]]. When setting n=2, this
+        will probably result in [[2,0,0],[0,1,1]].
        Notes
        -----
@@ -1345,7 +1345,6 @@ class MRG_RandomStreams(object):
                    "The specified size contains a dimension with value <= 0",
                    size)
-        if n == 1 and pvals.ndim == 2:
        if size is not None:
            raise ValueError("Provided a size argument to "
                             "MRG_RandomStreams.multinomial, which does not use "
@@ -1354,16 +1353,15 @@ class MRG_RandomStreams(object):
            raise ValueError("Provided an ndim argument to "
                             "MRG_RandomStreams.multinomial, which does not use "
                             "the ndim argument.")
-            ndim, size, bcast = raw_random._infer_ndim_bcast(
+        if pvals.ndim == 2:
-                    ndim, size, pvals[:, 0])
+            size = pvals[:,0].shape * n
-            assert ndim == 1
-            bcast = bcast + (pvals.type.broadcastable[-1],)
            unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
            op = multinomial.MultinomialFromUniform(dtype)
-            return op(pvals, unis)
+            n_samples = as_tensor_variable(n)
+            return op(pvals, unis, n_samples)
        else:
            raise NotImplementedError(("MRG_RandomStreams.multinomial only"
-                " implemented with n == 1 and pvals.ndim = 2"))
+                                       " implemented for pvals.ndim = 2"))
    def normal(self, size, avg=0.0, std=1.0, ndim=None,
               dtype=None, nstreams=None):

--- a/theano/sandbox/tests/multinomial_test_graph.pkl
+++ b/theano/sandbox/tests/multinomial_test_graph.pkl
--- a/theano/sandbox/tests/test_multinomial.py
+++ b/theano/sandbox/tests/test_multinomial.py
--- a/theano/sandbox/tests/test_rng_mrg.py
+++ b/theano/sandbox/tests/test_rng_mrg.py