提交 164d2c78 authored 作者: Amjad Almahairi's avatar Amjad Almahairi

more changes

上级 304a4fd9
...@@ -4,6 +4,8 @@ import theano ...@@ -4,6 +4,8 @@ import theano
from theano import Op, Apply from theano import Op, Apply
import theano.tensor as T import theano.tensor as T
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.scalar import as_scalar
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp
if cuda_available: if cuda_available:
...@@ -11,7 +13,6 @@ if cuda_available: ...@@ -11,7 +13,6 @@ if cuda_available:
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
class MultinomialFromUniform(Op): class MultinomialFromUniform(Op):
""" """
Converts samples from a uniform into sample from a multinomial. Converts samples from a uniform into sample from a multinomial.
...@@ -45,7 +46,7 @@ class MultinomialFromUniform(Op): ...@@ -45,7 +46,7 @@ class MultinomialFromUniform(Op):
else: else:
odtype = self.odtype odtype = self.odtype
out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable) out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
return Apply(self, [pvals, unis, n], [out]) return Apply(self, [pvals, unis, as_scalar(n)], [out])
def grad(self, ins, outgrads): def grad(self, ins, outgrads):
pvals, unis, n = ins pvals, unis, n = ins
...@@ -56,7 +57,7 @@ class MultinomialFromUniform(Op): ...@@ -56,7 +57,7 @@ class MultinomialFromUniform(Op):
# return (6,) # return (6,)
def c_code(self, node, name, ins, outs, sub): def c_code(self, node, name, ins, outs, sub):
(pvals, unis, ns) = ins (pvals, unis, n) = ins
(z,) = outs (z,) = outs
if self.odtype == 'auto': if self.odtype == 'auto':
t = "PyArray_TYPE(%(pvals)s)" % locals() t = "PyArray_TYPE(%(pvals)s)" % locals()
...@@ -79,9 +80,9 @@ class MultinomialFromUniform(Op): ...@@ -79,9 +80,9 @@ class MultinomialFromUniform(Op):
%(fail)s; %(fail)s;
} }
if (PyArray_DIMS(%(unis)s)[0] != PyArray_DIMS(%(pvals)s)[0]) if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
{ {
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]"); PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
%(fail)s; %(fail)s;
} }
...@@ -106,17 +107,17 @@ class MultinomialFromUniform(Op): ...@@ -106,17 +107,17 @@ class MultinomialFromUniform(Op):
const int nb_multi = PyArray_DIMS(%(pvals)s)[0]; const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1]; const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
const int nb_samples = %(ns)s; const int n_samples = %(n)s;
// //
// For each multinomial, loop over each possible outcome // For each multinomial, loop over each possible outcome
// //
for (int c = 0; c < nb_samples; ++c){ for (int c = 0; c < n_samples; ++c){
for (int n = 0; n < nb_multi; ++n) for (int n = 0; n < nb_multi; ++n)
{ {
int waiting = 1; int waiting = 1;
dtype_%(pvals)s cummul = 0.; dtype_%(pvals)s cummul = 0.;
const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_samples + n); const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*n_samples + n);
for (int m = 0; m < nb_outcomes; ++m) for (int m = 0; m < nb_outcomes; ++m)
{ {
dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m); dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m);
...@@ -136,10 +137,10 @@ class MultinomialFromUniform(Op): ...@@ -136,10 +137,10 @@ class MultinomialFromUniform(Op):
} }
} }
else { else {
if (waiting && (cummul > *unis_n)) if (cummul > *unis_n)
{ {
*z_nm = *z_nm + 1.; *z_nm = *z_nm + 1.;
waiting = 0; break;
} }
} }
} }
...@@ -149,12 +150,14 @@ class MultinomialFromUniform(Op): ...@@ -149,12 +150,14 @@ class MultinomialFromUniform(Op):
""" % locals() """ % locals()
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
# import pdb; pdb.set_trace()
(pvals, unis, n_samples) = ins (pvals, unis, n_samples) = ins
(z,) = outs (z,) = outs
if unis.shape[0] * n_samples != pvals.shape[0]: if unis.shape[0] != pvals.shape[0] * n_samples:
raise ValueError("unis.shape[0] != pvals.shape[0]", raise ValueError("unis.shape[0] != pvals.shape[0] * n_samples",
unis.shape[0], pvals.shape[0]) unis.shape[0], pvals.shape[0], n_samples)
if z[0] is None or z[0].shape != pvals.shape: if z[0] is None or z[0].shape != pvals.shape:
z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype) z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype)
...@@ -162,18 +165,24 @@ class MultinomialFromUniform(Op): ...@@ -162,18 +165,24 @@ class MultinomialFromUniform(Op):
nb_outcomes = pvals.shape[1] nb_outcomes = pvals.shape[1]
# For each multinomial, loop over each possible outcome # For each multinomial, loop over each possible outcome
for n in range(nb_multi): for c in range(n_samples):
waiting = True for n in range(nb_multi):
cummul = 0 waiting = True
unis_n = unis[n] cummul = 0
unis_n = unis[n]
for m in range(nb_outcomes):
cummul += pvals[n, m] for m in range(nb_outcomes):
if (waiting and (cummul > unis_n)): cummul += pvals[n, m]
z[0][n, m] = 1 if c == 0:
waiting = False if (waiting and (cummul > unis_n)):
else: z[0][n, m] = 1
z[0][n, m] = 0 waiting = False
else:
z[0][n, m] = 0
else:
if (cummul > unis_n):
z[0][n, m] += 1
break
class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
...@@ -360,6 +369,11 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -360,6 +369,11 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform: if type(node.op) is MultinomialFromUniform:
p, u, n_samples = node.inputs p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32' and if (p.dtype == u.dtype == m.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, any([i.owner and isinstance(i.owner.op,
...@@ -367,16 +381,21 @@ def local_gpu_multinomial(node): ...@@ -367,16 +381,21 @@ def local_gpu_multinomial(node):
for i in node.inputs])): for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype) gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i) return [host_from_gpu(gpu_op(*[gpu_from_host(i)
for i in node.inputs])).T] for i in [p, u]])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and node.inputs[0].owner and
type(node.inputs[0].owner.op) is MultinomialFromUniform): type(node.inputs[0].owner.op) is MultinomialFromUniform):
multi = node.inputs[0].owner multi = node.inputs[0].owner
p, u, n_samples = multi.inputs p, u, n_samples = multi.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = multi.outputs m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'): if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype) gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T
# The dimshuffle is on the cpu, but will be moved to the # The dimshuffle is on the cpu, but will be moved to the
# gpu by an opt. # gpu by an opt.
return [gpu_from_host(ret)] return [gpu_from_host(ret)]
......
...@@ -1355,19 +1355,11 @@ class MRG_RandomStreams(object): ...@@ -1355,19 +1355,11 @@ class MRG_RandomStreams(object):
"MRG_RandomStreams.multinomial, which does not use " "MRG_RandomStreams.multinomial, which does not use "
"the ndim argument.") "the ndim argument.")
if pvals.ndim == 2: if pvals.ndim == 2:
if n == 1:
size = pvals[:,0].shape size = pvals[:,0].shape
unis = self.uniform(size=size, ndim=1, nstreams=nstreams) unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
op = multinomial.MultinomialFromUniform(dtype) op = multinomial.MultinomialFromUniform(dtype)
n_samples = constant(n) n_samples = as_tensor_variable(n)
return op(pvals, unis, n_samples) return op(pvals, unis, n_samples)
elif n > 1:
# size = pvals[:,0].shape * n
# unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
raise NotImplementedError('under construction!')
else:
raise NotImplementedError(("MRG_RandomStreams.multinomial only"
" implemented for n > 0"))
else: else:
raise NotImplementedError(("MRG_RandomStreams.multinomial only" raise NotImplementedError(("MRG_RandomStreams.multinomial only"
" implemented for pvals.ndim = 2")) " implemented for pvals.ndim = 2"))
......
...@@ -8,6 +8,7 @@ from theano.sandbox import multinomial ...@@ -8,6 +8,7 @@ from theano.sandbox import multinomial
from theano.compile.mode import get_default_mode, predefined_linkers from theano.compile.mode import get_default_mode, predefined_linkers
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.scalar import constant
def get_mode(gpu): def get_mode(gpu):
...@@ -29,18 +30,34 @@ def run_with_c(f, gpu=False): ...@@ -29,18 +30,34 @@ def run_with_c(f, gpu=False):
f(mode, gpu) f(mode, gpu)
def test_n_samples():
p = tensor.fmatrix()
u = tensor.fvector()
n = tensor.scalar()
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
f = function([p, u, n], m, allow_input_downcast=True)
for uni in [.1, .2, .4, .5, .7, .9]:
for i in [1, 5, 10, 15, 100]:
res = f([[1, 0], [0, 1]], [.4, .4]*i, i)
utt.assert_allclose(res,
[[i, 0], [0, i]])
def test_multinomial_0(): def test_multinomial_0():
# This tests the MultinomialFromUniform Op directly, not going through the # This tests the MultinomialFromUniform Op directly, not going through the
# multinomial() call in GPU random generation. # multinomial() call in GPU random generation.
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
n = constant(1)
m = multinomial.MultinomialFromUniform('auto')(p, u)
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
def body(mode, gpu): def body(mode, gpu):
# the m*2 allows the multinomial to reuse output # the m*2 allows the multinomial to reuse output
f = function([p, u], m*2, allow_input_downcast=True, mode=mode) f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
if gpu: if gpu:
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()]) for node in f.maker.fgraph.toposort()])
...@@ -74,7 +91,8 @@ def test_multinomial_large(): ...@@ -74,7 +91,8 @@ def test_multinomial_large():
def body(mode, gpu): def body(mode, gpu):
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p, u) n = constant(1)
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
f = function([p, u], m*2, allow_input_downcast=True, mode=mode) f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
if gpu: if gpu:
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
...@@ -105,17 +123,20 @@ def test_multinomial_large(): ...@@ -105,17 +123,20 @@ def test_multinomial_large():
def test_multinomial_dtypes(): def test_multinomial_dtypes():
p = tensor.dmatrix() p = tensor.dmatrix()
u = tensor.dvector() u = tensor.dvector()
m = multinomial.MultinomialFromUniform('auto')(p, u) n = constant(1)
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
assert m.dtype == 'float64', m.dtype assert m.dtype == 'float64', m.dtype
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p, u) n = constant(1)
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
assert m.dtype == 'float32', m.dtype assert m.dtype == 'float32', m.dtype
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('float64')(p, u) n = constant(1)
m = multinomial.MultinomialFromUniform('float64')(p, u, n)
assert m.dtype == 'float64', m.dtype assert m.dtype == 'float64', m.dtype
...@@ -129,7 +150,8 @@ def test_gpu_opt(): ...@@ -129,7 +150,8 @@ def test_gpu_opt():
# is moved to the gpu. # is moved to the gpu.
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p, u) n = constant(1)
m = multinomial.MultinomialFromUniform('auto')(p, u, n)
assert m.dtype == 'float32', m.dtype assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m) m_gpu = cuda.gpu_from_host(m)
...@@ -143,7 +165,7 @@ def test_gpu_opt(): ...@@ -143,7 +165,7 @@ def test_gpu_opt():
# Test with a row, it was failing in the past. # Test with a row, it was failing in the past.
r = tensor.frow() r = tensor.frow()
m = multinomial.MultinomialFromUniform('auto')(r, u) m = multinomial.MultinomialFromUniform('auto')(r, u, n)
assert m.dtype == 'float32', m.dtype assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m) m_gpu = cuda.gpu_from_host(m)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论