Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
931dfec1
提交
931dfec1
authored
2月 11, 2016
作者:
carriepl
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3795 from aalmah/rand_weighted_select
Sample multinomial with replacement
上级
b2362664
837e3abf
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
270 行增加
和
0 行删除
+270
-0
multinomial.py
theano/sandbox/multinomial.py
+67
-0
rng_mrg.py
theano/sandbox/rng_mrg.py
+48
-0
test_multinomial_wo_replacement.py
theano/sandbox/tests/test_multinomial_wo_replacement.py
+155
-0
没有找到文件。
theano/sandbox/multinomial.py
浏览文件 @
931dfec1
...
...
@@ -6,6 +6,7 @@ import theano.tensor as T
from
theano.gof
import
local_optimizer
from
theano.tensor
import
NotScalarConstantError
,
get_scalar_constant_value
from
theano.scalar
import
as_scalar
import
copy
from
theano.sandbox.cuda
import
cuda_available
,
GpuOp
if
cuda_available
:
...
...
@@ -193,6 +194,72 @@ class MultinomialFromUniform(Op):
break
class
MultinomialWOReplacementFromUniform
(
MultinomialFromUniform
):
"""
Converts samples from a uniform into sample (without replacement) from a multinomial.
"""
def
make_node
(
self
,
pvals
,
unis
,
n
=
1
):
pvals
=
T
.
as_tensor_variable
(
pvals
)
unis
=
T
.
as_tensor_variable
(
unis
)
if
pvals
.
ndim
!=
2
:
raise
NotImplementedError
(
'pvals ndim should be 2'
,
pvals
.
ndim
)
if
unis
.
ndim
!=
1
:
raise
NotImplementedError
(
'unis ndim should be 1'
,
unis
.
ndim
)
if
self
.
odtype
==
'auto'
:
odtype
=
'int64'
else
:
odtype
=
self
.
odtype
out
=
T
.
tensor
(
dtype
=
odtype
,
broadcastable
=
pvals
.
type
.
broadcastable
)
return
Apply
(
self
,
[
pvals
,
unis
,
as_scalar
(
n
)],
[
out
])
def
perform
(
self
,
node
,
ins
,
outs
):
(
pvals
,
unis
,
n_samples
)
=
ins
# make a copy so we do not overwrite the input
pvals
=
copy
.
copy
(
pvals
)
(
z
,)
=
outs
if
n_samples
>
pvals
.
shape
[
1
]:
raise
ValueError
(
"Cannot sample without replacement n samples bigger "
"than the size of the distribution."
)
if
unis
.
shape
[
0
]
!=
pvals
.
shape
[
0
]
*
n_samples
:
raise
ValueError
(
"unis.shape[0] != pvals.shape[0] * n_samples"
,
unis
.
shape
[
0
],
pvals
.
shape
[
0
],
n_samples
)
if
self
.
odtype
==
'auto'
:
odtype
=
'int64'
else
:
odtype
=
self
.
odtype
if
z
[
0
]
is
None
or
not
numpy
.
all
(
z
[
0
]
.
shape
==
[
pvals
.
shape
[
0
],
n_samples
]):
z
[
0
]
=
-
1
*
numpy
.
ones
((
pvals
.
shape
[
0
],
n_samples
),
dtype
=
odtype
)
nb_multi
=
pvals
.
shape
[
0
]
nb_outcomes
=
pvals
.
shape
[
1
]
# For each multinomial, loop over each possible outcome,
# and set selected pval to 0 after being selected
for
c
in
range
(
n_samples
):
for
n
in
range
(
nb_multi
):
cummul
=
0
unis_n
=
unis
[
c
*
nb_multi
+
n
]
for
m
in
range
(
nb_outcomes
):
cummul
+=
pvals
[
n
,
m
]
if
(
cummul
>
unis_n
):
z
[
0
][
n
,
c
]
=
m
# set to zero and re-normalize so that it's not selected again
pvals
[
n
,
m
]
=
0.
pvals
[
n
]
/=
pvals
[
n
]
.
sum
()
break
def
c_code_cache_version
(
self
):
return
None
def
c_code
(
self
,
node
,
name
,
ins
,
outs
,
sub
):
raise
NotImplementedError
(
'no C implementation yet!'
)
class
GpuMultinomialFromUniform
(
MultinomialFromUniform
,
GpuOp
):
"""
The output is transposed compared to MultinomialFromUniform.
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
931dfec1
...
...
@@ -1363,6 +1363,54 @@ class MRG_RandomStreams(object):
raise
NotImplementedError
((
"MRG_RandomStreams.multinomial only"
" implemented for pvals.ndim = 2"
))
def
multinomial_wo_replacement
(
self
,
size
=
None
,
n
=
1
,
pvals
=
None
,
ndim
=
None
,
dtype
=
'int64'
,
nstreams
=
None
):
"""
Sample `n` times *WITHOUT replacement* from a multinomial distribution
defined by probabilities pvals, and returns the indices of the sampled
elements.
`n` needs to be in [1, m], where m is the number of elements to select
from, i.e. m == pvals.shape[1]. By default n = 1.
Example : pvals = [[.98, .01, .01], [.01, .49, .50]] and n=1 will
probably result in [[0],[2]]. When setting n=2, this
will probably result in [[0,1],[2,1]].
Notes
-----
-`size` and `ndim` are only there keep the same signature as other
uniform, binomial, normal, etc.
TODO : adapt multinomial to take that into account
-Does not do any value checking on pvals, i.e. there is no
check that the elements are non-negative, less than 1, or
sum to 1. passing pvals = [[-2., 2.]] will result in
sampling [[0, 0]]
"""
if
pvals
is
None
:
raise
TypeError
(
"You have to specify pvals"
)
pvals
=
as_tensor_variable
(
pvals
)
if
size
is
not
None
:
raise
ValueError
(
"Provided a size argument to "
"MRG_RandomStreams.multinomial_wo_replacement, which does not use "
"the size argument."
)
if
ndim
is
not
None
:
raise
ValueError
(
"Provided an ndim argument to "
"MRG_RandomStreams.multinomial_wo_replacement, which does not use "
"the ndim argument."
)
if
pvals
.
ndim
==
2
:
# size = [pvals.shape[0], as_tensor_variable(n)]
size
=
pvals
[:,
0
]
.
shape
*
n
unis
=
self
.
uniform
(
size
=
size
,
ndim
=
1
,
nstreams
=
nstreams
)
op
=
multinomial
.
MultinomialWOReplacementFromUniform
(
dtype
)
n_samples
=
as_tensor_variable
(
n
)
return
op
(
pvals
,
unis
,
n_samples
)
else
:
raise
NotImplementedError
((
"MRG_RandomStreams.multinomial_wo_replacement only"
" implemented for pvals.ndim = 2"
))
def
normal
(
self
,
size
,
avg
=
0.0
,
std
=
1.0
,
ndim
=
None
,
dtype
=
None
,
nstreams
=
None
):
"""
...
...
theano/sandbox/tests/test_multinomial_wo_replacement.py
0 → 100644
浏览文件 @
931dfec1
import
numpy
from
theano
import
config
,
function
,
tensor
from
theano.sandbox
import
multinomial
from
theano.sandbox.rng_mrg
import
MRG_RandomStreams
as
RandomStreams
import
unittest
class
test_OP
(
unittest
.
TestCase
):
def
test_select_distinct
(
self
):
"""
Tests that MultinomialWOReplacementFromUniform always selects distinct elements
"""
p
=
tensor
.
fmatrix
()
u
=
tensor
.
fvector
()
n
=
tensor
.
iscalar
()
m
=
multinomial
.
MultinomialWOReplacementFromUniform
(
'auto'
)(
p
,
u
,
n
)
f
=
function
([
p
,
u
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
1000
all_indices
=
range
(
n_elements
)
numpy
.
random
.
seed
(
12345
)
for
i
in
[
5
,
10
,
50
,
100
,
500
,
n_elements
]:
uni
=
numpy
.
random
.
rand
(
i
)
.
astype
(
config
.
floatX
)
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
res
=
f
(
pvals
,
uni
,
i
)
res
=
numpy
.
squeeze
(
res
)
assert
len
(
res
)
==
i
assert
numpy
.
all
(
numpy
.
in1d
(
numpy
.
unique
(
res
),
all_indices
)),
res
def
test_fail_select_alot
(
self
):
"""
Tests that MultinomialWOReplacementFromUniform fails when asked to sample more
elements than the actual number of elements
"""
p
=
tensor
.
fmatrix
()
u
=
tensor
.
fvector
()
n
=
tensor
.
iscalar
()
m
=
multinomial
.
MultinomialWOReplacementFromUniform
(
'auto'
)(
p
,
u
,
n
)
f
=
function
([
p
,
u
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
100
n_selected
=
200
numpy
.
random
.
seed
(
12345
)
uni
=
numpy
.
random
.
rand
(
n_selected
)
.
astype
(
config
.
floatX
)
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
self
.
assertRaises
(
ValueError
,
f
,
pvals
,
uni
,
n_selected
)
def
test_select_proportional_to_weight
(
self
):
"""
Tests that MultinomialWOReplacementFromUniform selects elements, on average,
proportional to the their probabilities
"""
p
=
tensor
.
fmatrix
()
u
=
tensor
.
fvector
()
n
=
tensor
.
iscalar
()
m
=
multinomial
.
MultinomialWOReplacementFromUniform
(
'auto'
)(
p
,
u
,
n
)
f
=
function
([
p
,
u
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
100
n_selected
=
10
mean_rtol
=
0.0005
numpy
.
random
.
seed
(
12345
)
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
avg_pvals
=
numpy
.
zeros
((
n_elements
,),
dtype
=
config
.
floatX
)
for
rep
in
range
(
10000
):
uni
=
numpy
.
random
.
rand
(
n_selected
)
.
astype
(
config
.
floatX
)
res
=
f
(
pvals
,
uni
,
n_selected
)
res
=
numpy
.
squeeze
(
res
)
avg_pvals
[
res
]
+=
1
avg_pvals
/=
avg_pvals
.
sum
()
avg_diff
=
numpy
.
mean
(
abs
(
avg_pvals
-
pvals
))
assert
avg_diff
<
mean_rtol
,
avg_diff
class
test_function
(
unittest
.
TestCase
):
def
test_select_distinct
(
self
):
"""
Tests that multinomial_wo_replacement always selects distinct elements
"""
th_rng
=
RandomStreams
(
12345
)
p
=
tensor
.
fmatrix
()
n
=
tensor
.
iscalar
()
m
=
th_rng
.
multinomial_wo_replacement
(
pvals
=
p
,
n
=
n
)
f
=
function
([
p
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
1000
all_indices
=
range
(
n_elements
)
numpy
.
random
.
seed
(
12345
)
for
i
in
[
5
,
10
,
50
,
100
,
500
,
n_elements
]:
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
res
=
f
(
pvals
,
i
)
res
=
numpy
.
squeeze
(
res
)
assert
len
(
res
)
==
i
assert
numpy
.
all
(
numpy
.
in1d
(
numpy
.
unique
(
res
),
all_indices
)),
res
def
test_fail_select_alot
(
self
):
"""
Tests that multinomial_wo_replacement fails when asked to sample more
elements than the actual number of elements
"""
th_rng
=
RandomStreams
(
12345
)
p
=
tensor
.
fmatrix
()
n
=
tensor
.
iscalar
()
m
=
th_rng
.
multinomial_wo_replacement
(
pvals
=
p
,
n
=
n
)
f
=
function
([
p
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
100
n_selected
=
200
numpy
.
random
.
seed
(
12345
)
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
self
.
assertRaises
(
ValueError
,
f
,
pvals
,
n_selected
)
def
test_select_proportional_to_weight
(
self
):
"""
Tests that multinomial_wo_replacement selects elements, on average,
proportional to the their probabilities
"""
th_rng
=
RandomStreams
(
12345
)
p
=
tensor
.
fmatrix
()
n
=
tensor
.
iscalar
()
m
=
th_rng
.
multinomial_wo_replacement
(
pvals
=
p
,
n
=
n
)
f
=
function
([
p
,
n
],
m
,
allow_input_downcast
=
True
)
n_elements
=
100
n_selected
=
10
mean_rtol
=
0.0005
numpy
.
random
.
seed
(
12345
)
pvals
=
numpy
.
random
.
randint
(
1
,
100
,
(
1
,
n_elements
))
.
astype
(
config
.
floatX
)
pvals
/=
pvals
.
sum
(
1
)
avg_pvals
=
numpy
.
zeros
((
n_elements
,),
dtype
=
config
.
floatX
)
for
rep
in
range
(
10000
):
res
=
f
(
pvals
,
n_selected
)
res
=
numpy
.
squeeze
(
res
)
avg_pvals
[
res
]
+=
1
avg_pvals
/=
avg_pvals
.
sum
()
avg_diff
=
numpy
.
mean
(
abs
(
avg_pvals
-
pvals
))
assert
avg_diff
<
mean_rtol
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论