Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
25dfa312
提交
25dfa312
authored
10月 13, 2017
作者:
Frédéric Bastien
提交者:
GitHub
10月 13, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6401 from mrTsjolder/master
Implement truncated normal with box-muller
上级
e3c95974
e29586c9
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
165 行增加
和
17 行删除
+165
-17
multinomial.py
theano/gpuarray/multinomial.py
+22
-11
rng_mrg.py
theano/gpuarray/rng_mrg.py
+2
-2
rng_mrg.py
theano/sandbox/rng_mrg.py
+0
-0
test_rng_mrg.py
theano/sandbox/tests/test_rng_mrg.py
+141
-4
没有找到文件。
theano/gpuarray/multinomial.py
浏览文件 @
25dfa312
...
...
@@ -39,11 +39,11 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op):
return
[
gpuarray_helper_inc_dir
()]
def
make_node
(
self
,
pvals
,
unis
):
assert
unis
.
dtype
==
pvals
.
dtype
assert
pvals
.
dtype
in
[
'float32'
,
'float16'
,
'float64'
]
ctx_name
=
infer_context_name
(
pvals
,
unis
)
pvals
=
as_gpuarray_variable
(
pvals
,
ctx_name
)
unis
=
as_gpuarray_variable
(
unis
,
ctx_name
)
assert
pvals
.
dtype
in
[
'float32'
,
'float16'
,
'float64'
]
assert
unis
.
dtype
in
[
'float32'
,
'float16'
,
'float64'
]
if
pvals
.
ndim
!=
2
:
raise
NotImplementedError
(
'pvals ndim should be 2'
,
pvals
.
ndim
)
...
...
@@ -62,7 +62,8 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op):
def
gpu_kernels
(
self
,
node
,
name
):
out_ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
node
.
outputs
[
0
]
.
dtype
)
in_ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
node
.
inputs
[
0
]
.
dtype
)
pvals_ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
node
.
inputs
[
0
]
.
dtype
)
unis_ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
node
.
inputs
[
1
]
.
dtype
)
work_ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
work_dtype
(
node
.
inputs
[
0
]
.
dtype
))
write_out_ctype
=
write_w
(
node
.
outputs
[
0
]
.
dtype
)
load_in_ctype
=
load_w
(
node
.
inputs
[
0
]
.
dtype
)
...
...
@@ -71,11 +72,11 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op):
KERNEL void k_multi_warp_multinomial(
const ga_size nb_multi,
const ga_size nb_outcomes,
GLOBAL_MEM
%(
in
_ctype)
s *global_pvals,
GLOBAL_MEM
%(
pvals
_ctype)
s *global_pvals,
const ga_size global_pvals_offset,
const ga_ssize pvals_row_stride,
const ga_ssize pvals_col_stride,
GLOBAL_MEM
%(
in
_ctype)
s *global_unis,
GLOBAL_MEM
%(
unis
_ctype)
s *global_unis,
const ga_size global_unis_offset,
const ga_ssize unis_stride,
GLOBAL_MEM
%(out_ctype)
s *global_outs,
...
...
@@ -84,8 +85,8 @@ KERNEL void k_multi_warp_multinomial(
const ga_ssize outs_col_stride
)
{
global_pvals = (GLOBAL_MEM
%(
in
_ctype)
s *)(((GLOBAL_MEM char *)global_pvals) + global_pvals_offset);
global_unis = (GLOBAL_MEM
%(
in
_ctype)
s *)(((GLOBAL_MEM char *)global_unis) + global_unis_offset);
global_pvals = (GLOBAL_MEM
%(
pvals
_ctype)
s *)(((GLOBAL_MEM char *)global_pvals) + global_pvals_offset);
global_unis = (GLOBAL_MEM
%(
unis
_ctype)
s *)(((GLOBAL_MEM char *)global_unis) + global_unis_offset);
global_outs = (GLOBAL_MEM
%(out_ctype)
s *)(((GLOBAL_MEM char *)global_outs) + global_outs_offset);
// each thread takes care of one multinomial draw
int n = LDIM_0*GID_0 + LID_0;
...
...
@@ -113,7 +114,8 @@ KERNEL void k_multi_warp_multinomial(
}
}
"""
%
dict
(
out_ctype
=
out_ctype
,
write_out_ctype
=
write_out_ctype
,
work_ctype
=
work_ctype
,
in_ctype
=
in_ctype
,
load_in_ctype
=
load_in_ctype
)
work_ctype
=
work_ctype
,
pvals_ctype
=
pvals_ctype
,
unis_ctype
=
unis_ctype
,
load_in_ctype
=
load_in_ctype
)
return
[
Kernel
(
code
=
code
,
name
=
"k_multi_warp_multinomial"
,
params
=
[
pygpu
.
gpuarray
.
SIZE
,
...
...
@@ -139,7 +141,8 @@ KERNEL void k_multi_warp_multinomial(
ctx
=
sub
[
'params'
]
kname
=
self
.
gpu_kernels
(
node
,
name
)[
0
]
.
objvar
out_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
outputs
[
0
]
.
dtype
)
in_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
pvals_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
unis_typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
1
]
.
dtype
)
s
=
"""
PyGpuArrayObject * pvals =
%(pvals)
s;
PyGpuArrayObject * unis =
%(unis)
s;
...
...
@@ -201,7 +204,15 @@ KERNEL void k_multi_warp_multinomial(
assert(nb_blocks*nb_threads >= nb_multi);
int err = k_multi_warp_multinomial_call(1, &nb_blocks, &nb_threads, 0, PyGpuArray_DIMS(out)[1], PyGpuArray_DIMS(out)[0], pvals->ga.data, pvals->ga.offset, PyGpuArray_STRIDES(pvals)[0]/gpuarray_get_elsize(
%(in_typecode)
s), PyGpuArray_STRIDES(pvals)[1]/gpuarray_get_elsize(
%(in_typecode)
s), unis->ga.data, unis->ga.offset, PyGpuArray_STRIDES(unis)[0]/gpuarray_get_elsize(
%(in_typecode)
s), out->ga.data, out->ga.offset, PyGpuArray_STRIDES(out)[0]/gpuarray_get_elsize(
%(out_typecode)
s), PyGpuArray_STRIDES(out)[1]/gpuarray_get_elsize(
%(out_typecode)
s));
int err = k_multi_warp_multinomial_call(
1, &nb_blocks, &nb_threads, 0,
PyGpuArray_DIMS(out)[1], PyGpuArray_DIMS(out)[0], pvals->ga.data, pvals->ga.offset,
PyGpuArray_STRIDES(pvals)[0]/gpuarray_get_elsize(
%(pvals_typecode)
s),
PyGpuArray_STRIDES(pvals)[1]/gpuarray_get_elsize(
%(pvals_typecode)
s),
unis->ga.data, unis->ga.offset,
PyGpuArray_STRIDES(unis)[0]/gpuarray_get_elsize(
%(unis_typecode)
s), out->ga.data,
out->ga.offset, PyGpuArray_STRIDES(out)[0]/gpuarray_get_elsize(
%(out_typecode)
s),
PyGpuArray_STRIDES(out)[1]/gpuarray_get_elsize(
%(out_typecode)
s));
if (err != GA_NO_ERROR) {
PyErr_Format(
...
...
@@ -218,7 +229,7 @@ KERNEL void k_multi_warp_multinomial(
return
s
def
c_code_cache_version
(
self
):
return
(
6
,)
return
(
7
,)
class
GPUAChoiceFromUniform
(
GpuKernelBase
,
Op
):
...
...
theano/gpuarray/rng_mrg.py
浏览文件 @
25dfa312
...
...
@@ -271,7 +271,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
if (n_streams > n_elements)
n_streams = n_elements;
{
if (n_elements > 0)
{
size_t ls = 0, gs = 0;
int err = GpuKernel_sched(&
%(kname)
s, n_streams, &ls, &gs);
if (err != GA_NO_ERROR) {
...
...
@@ -303,7 +303,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
"""
%
dict
(
fail
=
sub
[
'fail'
]))
def
c_code_cache_version
(
self
):
return
(
1
6
,)
return
(
1
7
,)
@register_opt2
([
mrg_uniform
],
'fast_compile'
)
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
25dfa312
差异被折叠。
点击展开。
theano/sandbox/tests/test_rng_mrg.py
浏览文件 @
25dfa312
...
...
@@ -298,7 +298,8 @@ def test_broadcastable():
pvals_2
=
R
.
uniform
(
size
=
size2
)
pvals_2
=
pvals_2
/
tensor
.
sum
(
pvals_2
)
for
distribution
in
[
R
.
uniform
,
R
.
binomial
,
R
.
multinomial
,
R
.
multinomial_wo_replacement
,
R
.
normal
]:
for
distribution
in
[
R
.
uniform
,
R
.
normal
,
R
.
truncated_normal
,
R
.
binomial
,
R
.
multinomial
,
R
.
multinomial_wo_replacement
]:
# multinomial or multinomial_wo_replacement does not support "size" argument,
# the sizes of them are implicitly defined with "pvals" argument.
if
distribution
in
[
R
.
multinomial
,
R
.
multinomial_wo_replacement
]:
...
...
@@ -378,7 +379,6 @@ def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
@attr
(
'slow'
)
def
test_normal0
():
steps
=
50
std
=
2.
if
(
config
.
mode
in
[
'DEBUG_MODE'
,
'DebugMode'
,
'FAST_COMPILE'
]
or
...
...
@@ -391,7 +391,7 @@ def test_normal0():
sample_size_odd
=
(
sample_size
[
0
],
sample_size
[
1
]
-
1
)
x
=
tensor
.
matrix
()
for
size
,
const_size
,
var_input
,
input
,
avg
,
rtol
,
std_tol
in
[
test_cases
=
[
(
sample_size
,
sample_size
,
[],
[],
-
5.
,
default_rtol
,
default_rtol
),
(
x
.
shape
,
sample_size
,
[
x
],
[
np
.
zeros
(
sample_size
,
dtype
=
config
.
floatX
)],
...
...
@@ -409,8 +409,9 @@ def test_normal0():
# test with few samples at the same time
((
1
,),
(
1
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
((
3
,),
(
3
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
]:
]
for
size
,
const_size
,
var_input
,
input
,
avg
,
rtol
,
std_tol
in
test_cases
:
R
=
MRG_RandomStreams
(
234
)
# Note: we specify `nstreams` to avoid a warning.
n
=
R
.
normal
(
size
=
size
,
avg
=
avg
,
std
=
std
,
...
...
@@ -438,6 +439,126 @@ def test_normal0():
prefix
=
'numpy '
,
allow_01
=
True
,
inputs
=
input
,
mean_rtol
=
rtol
)
@attr
(
'slow'
)
def
test_normal_truncation
():
# just a copy of test_normal0 with extra bound check
steps
=
50
std
=
2.
# standard deviation is slightly less than for a regular Gaussian
# constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
target_std
=
.
87962566103423978
*
std
if
(
config
.
mode
in
[
'DEBUG_MODE'
,
'DebugMode'
,
'FAST_COMPILE'
]
or
config
.
mode
==
'Mode'
and
config
.
linker
in
[
'py'
]):
sample_size
=
(
25
,
30
)
default_rtol
=
.
02
else
:
sample_size
=
(
999
,
50
)
default_rtol
=
.
01
sample_size_odd
=
(
sample_size
[
0
],
sample_size
[
1
]
-
1
)
x
=
tensor
.
matrix
()
test_cases
=
[
(
sample_size
,
sample_size
,
[],
[],
-
5.
,
default_rtol
,
default_rtol
),
(
x
.
shape
,
sample_size
,
[
x
],
[
np
.
zeros
(
sample_size
,
dtype
=
config
.
floatX
)],
-
5.
,
default_rtol
,
default_rtol
),
# test odd value
(
x
.
shape
,
sample_size_odd
,
[
x
],
[
np
.
zeros
(
sample_size_odd
,
dtype
=
config
.
floatX
)],
-
5.
,
default_rtol
,
default_rtol
),
(
sample_size
,
sample_size
,
[],
[],
np
.
arange
(
np
.
prod
(
sample_size
),
dtype
=
'float32'
)
.
reshape
(
sample_size
),
10.
*
std
/
np
.
sqrt
(
steps
),
default_rtol
),
# test empty size (scalar)
((),
(),
[],
[],
-
5.
,
default_rtol
,
0.02
),
# test with few samples at the same time
((
1
,),
(
1
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
((
3
,),
(
3
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
]
for
size
,
const_size
,
var_input
,
input
,
avg
,
rtol
,
std_tol
in
test_cases
:
R
=
MRG_RandomStreams
(
234
)
# Note: we specify `nstreams` to avoid a warning.
n
=
R
.
normal
(
size
=
size
,
avg
=
avg
,
std
=
std
,
truncate
=
True
,
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
f
=
theano
.
function
(
var_input
,
n
)
# check if truncated at 2*std
samples
=
f
(
*
input
)
assert
np
.
all
(
avg
+
2
*
std
-
samples
>=
0
),
\
(
"bad upper bound?
%
s
%
s"
%
(
samples
,
avg
+
2
*
std
))
assert
np
.
all
(
samples
-
(
avg
-
2
*
std
)
>=
0
),
\
(
"bad lower bound?
%
s
%
s"
%
(
samples
,
avg
-
2
*
std
))
# Increase the number of steps if size implies only a few samples
if
np
.
prod
(
const_size
)
<
10
:
steps_
=
steps
*
50
else
:
steps_
=
steps
basictest
(
f
,
steps_
,
const_size
,
target_avg
=
avg
,
target_std
=
target_std
,
prefix
=
'mrg '
,
allow_01
=
True
,
inputs
=
input
,
mean_rtol
=
rtol
,
std_tol
=
std_tol
)
sys
.
stdout
.
flush
()
@attr
(
'slow'
)
def
test_truncated_normal
():
# just a copy of test_normal0 for truncated normal
steps
=
50
std
=
2.
if
(
config
.
mode
in
[
'DEBUG_MODE'
,
'DebugMode'
,
'FAST_COMPILE'
]
or
config
.
mode
==
'Mode'
and
config
.
linker
in
[
'py'
]):
sample_size
=
(
25
,
30
)
default_rtol
=
.
02
else
:
sample_size
=
(
999
,
50
)
default_rtol
=
.
01
sample_size_odd
=
(
sample_size
[
0
],
sample_size
[
1
]
-
1
)
x
=
tensor
.
matrix
()
test_cases
=
[
(
sample_size
,
sample_size
,
[],
[],
-
5.
,
default_rtol
,
default_rtol
),
(
x
.
shape
,
sample_size
,
[
x
],
[
np
.
zeros
(
sample_size
,
dtype
=
config
.
floatX
)],
-
5.
,
default_rtol
,
default_rtol
),
# test odd value
(
x
.
shape
,
sample_size_odd
,
[
x
],
[
np
.
zeros
(
sample_size_odd
,
dtype
=
config
.
floatX
)],
-
5.
,
default_rtol
,
default_rtol
),
(
sample_size
,
sample_size
,
[],
[],
np
.
arange
(
np
.
prod
(
sample_size
),
dtype
=
'float32'
)
.
reshape
(
sample_size
),
10.
*
std
/
np
.
sqrt
(
steps
),
default_rtol
),
# test empty size (scalar)
((),
(),
[],
[],
-
5.
,
default_rtol
,
0.02
),
# test with few samples at the same time
((
1
,),
(
1
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
((
3
,),
(
3
,),
[],
[],
-
5.
,
default_rtol
,
0.02
),
]
for
size
,
const_size
,
var_input
,
input
,
avg
,
rtol
,
std_tol
in
test_cases
:
R
=
MRG_RandomStreams
(
234
)
# Note: we specify `nstreams` to avoid a warning.
n
=
R
.
truncated_normal
(
size
=
size
,
avg
=
avg
,
std
=
std
,
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
f
=
theano
.
function
(
var_input
,
n
)
# Increase the number of steps if size implies only a few samples
if
np
.
prod
(
const_size
)
<
10
:
steps_
=
steps
*
60
else
:
steps_
=
steps
basictest
(
f
,
steps_
,
const_size
,
target_avg
=
avg
,
target_std
=
std
,
prefix
=
'mrg '
,
allow_01
=
True
,
inputs
=
input
,
mean_rtol
=
rtol
,
std_tol
=
std_tol
)
sys
.
stdout
.
flush
()
def
basic_multinomialtest
(
f
,
steps
,
sample_size
,
target_pvals
,
n_samples
,
prefix
=
""
,
mean_rtol
=
0.04
):
...
...
@@ -519,6 +640,7 @@ class T_MRG(unittest.TestCase):
self
.
assertRaises
(
ValueError
,
R
.
binomial
,
size
)
self
.
assertRaises
(
ValueError
,
R
.
multinomial
,
size
,
1
,
[])
self
.
assertRaises
(
ValueError
,
R
.
normal
,
size
)
self
.
assertRaises
(
ValueError
,
R
.
truncated_normal
,
size
)
def
test_multiple_rng_aliasing
():
...
...
@@ -734,6 +856,19 @@ def test_undefined_grad():
assert_raises
(
theano
.
gradient
.
NullTypeGradError
,
theano
.
grad
,
out
,
(
avg
,
std
))
# checking truncated normal distribution
avg
=
tensor
.
scalar
()
out
=
srng
.
truncated_normal
((),
avg
=
avg
)
assert_raises
(
theano
.
gradient
.
NullTypeGradError
,
theano
.
grad
,
out
,
avg
)
std
=
tensor
.
scalar
()
out
=
srng
.
truncated_normal
((),
avg
=
0
,
std
=
std
)
assert_raises
(
theano
.
gradient
.
NullTypeGradError
,
theano
.
grad
,
out
,
std
)
out
=
srng
.
truncated_normal
((),
avg
=
avg
,
std
=
std
)
assert_raises
(
theano
.
gradient
.
NullTypeGradError
,
theano
.
grad
,
out
,
(
avg
,
std
))
def
test_f16_nonzero
(
mode
=
None
,
op_to_check
=
rng_mrg
.
mrg_uniform
):
srng
=
MRG_RandomStreams
(
seed
=
utt
.
fetch_seed
())
...
...
@@ -755,6 +890,8 @@ def test_target_parameter():
assert
isinstance
(
f
(),
np
.
ndarray
)
basic_target_parameter_test
(
srng
.
uniform
((
3
,
2
),
target
=
'cpu'
))
basic_target_parameter_test
(
srng
.
normal
((
3
,
2
),
target
=
'cpu'
))
basic_target_parameter_test
(
srng
.
truncated_normal
((
3
,
2
),
target
=
'cpu'
))
basic_target_parameter_test
(
srng
.
binomial
((
3
,
2
),
target
=
'cpu'
))
basic_target_parameter_test
(
srng
.
multinomial
(
pvals
=
pvals
.
astype
(
'float32'
),
target
=
'cpu'
))
basic_target_parameter_test
(
srng
.
choice
(
p
=
pvals
.
astype
(
'float32'
),
replace
=
False
,
target
=
'cpu'
))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论