replace as ChoiceFromUniform prop

6469a825 · Laurent Dinh · 72823c46 · 6469a825 · 6469a825 · 6469a825
--- a/theano/gpuarray/multinomial.py
+++ b/theano/gpuarray/multinomial.py
@@ -242,11 +242,17 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
    """
-    __props__ = ("odtype",)
+    __props__ = ("odtype", "replace")
-    def __init__(self, odtype):
+    def __init__(self, odtype, replace=False):
        Op.__init__(self)
        self.odtype = odtype
+        self.replace = replace
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if "replace" not in state:
+            self.replace = False
    def get_params(self, node):
        return node.outputs[0].type.context
@@ -282,6 +288,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
        return Apply(self, [pvals, unis, as_scalar(n)], [out])
    def gpu_kernels(self, node, name):
+        replace = int(self.replace)
        code = """
 KERNEL void k_multi_warp_multinomial_wor(
    const ga_size nb_multi,
@@ -318,23 +325,29 @@ KERNEL void k_multi_warp_multinomial_wor(
                    global_outs[n * outs_col_stride +
                                c * outs_row_stride] = m;
-                    global_pvals_copy[m * pvals_col_stride + n * pvals_row_stride] = 0.0;
+                    if (! %(replace)s )
-                    cummul -= pvals_nm;
+                    {
+                        global_pvals_copy[m * pvals_col_stride + n * pvals_row_stride] = 0.0;
+                        cummul -= pvals_nm;
+                    }
                    done = true;
                }
            }
            // No need to renormalize after the last samples.
            if (c == (n_samples - 1))
                break;
-            // parallel renormalize the multinomial
+            if (! %(replace)s )
-            for (ga_int k = LID_1; k < nb_outcomes; k+=LDIM_1)
            {
-                global_pvals_copy[k * pvals_col_stride + n * pvals_row_stride] /= cummul;
+                // parallel renormalize the multinomial
+                for (ga_int k = LID_1; k < nb_outcomes; k+=LDIM_1)
+                {
+                    global_pvals_copy[k * pvals_col_stride + n * pvals_row_stride] /= cummul;
+                }
            }
        }
    }
 }
-"""
+""" % {"replace": replace}
        return [Kernel(
            code=code, name="k_multi_warp_multinomial_wor",
            params=[pygpu.gpuarray.SIZE,

--- a/theano/gpuarray/tests/test_multinomial.py
+++ b/theano/gpuarray/tests/test_multinomial.py
@@ -180,7 +180,7 @@ class test_OP_wor(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)
@@ -204,7 +204,7 @@ class test_OP_wor(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)
@@ -224,7 +224,7 @@ class test_OP_wor(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)
@@ -327,7 +327,7 @@ def test_gpu_opt_wor():
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
-    m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+    m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
    assert m.dtype == 'int64', m.dtype
    f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu)

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -219,6 +219,17 @@ class ChoiceFromUniform(MultinomialFromUniform):
    """
+    __props__ = ("replace",)
+    def __init__(self, replace=False, *args, **kwargs):
+        self.replace = replace
+        super(ChoiceFromUniform, self).__init__(*args, **kwargs)
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if "replace" not in state:
+            self.replace = False
    def make_node(self, pvals, unis, n=1):
        pvals = T.as_tensor_variable(pvals)
        unis = T.as_tensor_variable(unis)
@@ -239,6 +250,7 @@ class ChoiceFromUniform(MultinomialFromUniform):
    def c_code(self, node, name, ins, outs, sub):
        (pvals, unis, n) = ins
        (z,) = outs
+        replace = int(self.replace)
        if self.odtype == 'auto':
            t = "NPY_INT64"
        else:
@@ -333,20 +345,23 @@ class ChoiceFromUniform(MultinomialFromUniform):
                        // No need to renormalize after the last samples.
                        if (c == (n_samples - 1))
                            break;
-                        // renormalize the nth row of pvals, reuse (cummul-*pvals_nm) to initialize the sum
+                        if (! %(replace)s )
-                        dtype_%(pvals)s sum = cummul - *pvals_nm;
-                        dtype_%(pvals)s* pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
-                        *pvals_nm = 0.;
-                        for (int k = m; k < nb_outcomes; ++k)
-                        {
-                            sum = sum + *pvals_n;
-                            pvals_n++;
-                        }
-                        pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, 0);
-                        for (int k = 0; k < nb_outcomes; ++k)
                        {
-                            *pvals_n = *pvals_n / sum;
+                            // renormalize the nth row of pvals, reuse (cummul-*pvals_nm) to initialize the sum
-                            pvals_n++;
+                            dtype_%(pvals)s sum = cummul - *pvals_nm;
+                            dtype_%(pvals)s* pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
+                            *pvals_nm = 0.;
+                            for (int k = m; k < nb_outcomes; ++k)
+                            {
+                                sum = sum + *pvals_n;
+                                pvals_n++;
+                            }
+                            pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, 0);
+                            for (int k = 0; k < nb_outcomes; ++k)
+                            {
+                                *pvals_n = *pvals_n / sum;
+                                pvals_n++;
+                            }
                        }
                        break;
                    }
@@ -398,8 +413,9 @@ class ChoiceFromUniform(MultinomialFromUniform):
                        z[0][n, c] = m
                        # set to zero and re-normalize so that it's not
                        # selected again
-                        pvals[n, m] = 0.
+                        if not self.replace:
-                        pvals[n] /= pvals[n].sum()
+                            pvals[n, m] = 0.
+                            pvals[n] /= pvals[n].sum()
                        break

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -1513,7 +1513,7 @@ class MRG_RandomStreams(object):
        shape = p[:, 0].shape * size
        unis = self.uniform(size=shape, ndim=1, nstreams=nstreams)
-        op = multinomial.ChoiceFromUniform(dtype)
+        op = multinomial.ChoiceFromUniform(odtype=dtype)
        return op(p, unis, as_tensor_variable(size))
    def multinomial_wo_replacement(self, size=None, n=1, pvals=None,

--- a/theano/sandbox/tests/test_multinomial_wo_replacement.py
+++ b/theano/sandbox/tests/test_multinomial_wo_replacement.py
@@ -18,7 +18,7 @@ class test_OP(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)
@@ -52,7 +52,7 @@ class test_OP(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)
@@ -72,7 +72,7 @@ class test_OP(unittest.TestCase):
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
-        m = multinomial.ChoiceFromUniform('auto')(p, u, n)
+        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)
        f = function([p, u, n], m, allow_input_downcast=True)