Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
927aac6a
提交
927aac6a
authored
5月 10, 2017
作者:
notoraptor
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Wrap Op params for theano.sandbox.rng_mrg.mrg_uniform:
- inplace (bool scalar) - ndim (integer scalar) - otypenum (integer scalar) - otype_is_float32 (bool scalar)
上级
354097d3
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
137 行增加
和
94 行删除
+137
-94
rng_mrg.py
theano/sandbox/rng_mrg.py
+137
-94
没有找到文件。
theano/sandbox/rng_mrg.py
浏览文件 @
927aac6a
...
@@ -21,7 +21,8 @@ from theano.tensor import (TensorType, as_tensor_variable, get_vector_length,
...
@@ -21,7 +21,8 @@ from theano.tensor import (TensorType, as_tensor_variable, get_vector_length,
cast
,
opt
,
scal
)
cast
,
opt
,
scal
)
from
theano.tensor
import
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.tensor
import
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
,
ParamsType
from
theano.scalar
import
bool
as
bool_t
,
int32
as
int_t
from
.
import
multinomial
from
.
import
multinomial
...
@@ -286,6 +287,14 @@ def mrg_next_value(rstate, new_rstate):
...
@@ -286,6 +287,14 @@ def mrg_next_value(rstate, new_rstate):
class
mrg_uniform_base
(
Op
):
class
mrg_uniform_base
(
Op
):
# TODO : need description for class, parameter
# TODO : need description for class, parameter
__props__
=
(
"output_type"
,
"inplace"
)
__props__
=
(
"output_type"
,
"inplace"
)
params_type
=
ParamsType
(
inplace
=
bool_t
,
# following params will come from self.output_type.
# NB: As output object may not be allocated in C code,
# we can not be sure to get these properties from output.
# So, we should better get them as params from self.output_type.
ndim
=
int_t
,
otypenum
=
int_t
,
otype_is_float32
=
bool_t
)
def
__init__
(
self
,
output_type
,
inplace
=
False
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
Op
.
__init__
(
self
)
Op
.
__init__
(
self
)
...
@@ -295,6 +304,13 @@ class mrg_uniform_base(Op):
...
@@ -295,6 +304,13 @@ class mrg_uniform_base(Op):
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
warned_numpy_version
=
False
self
.
warned_numpy_version
=
False
# These attributes (used as params) are created as properties
# to make them available even for old pickled objects, e.g.
# when testing old interface or when using FAST_COMPILE mode.
ndim
=
property
(
lambda
self
:
self
.
output_type
.
ndim
)
otypenum
=
property
(
lambda
self
:
np
.
dtype
(
self
.
output_type
.
dtype
)
.
num
)
otype_is_float32
=
property
(
lambda
self
:
self
.
output_type
.
dtype
==
'float32'
)
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
s
=
"inplace"
s
=
"inplace"
...
@@ -325,6 +341,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -325,6 +341,7 @@ class mrg_uniform(mrg_uniform_base):
broad
.
append
(
tensor
.
extract_constant
(
size
[
i
])
==
1
)
broad
.
append
(
tensor
.
extract_constant
(
size
[
i
])
==
1
)
output_type
=
self
.
output_type
.
clone
(
broadcastable
=
broad
)()
output_type
=
self
.
output_type
.
clone
(
broadcastable
=
broad
)()
rstate
=
as_tensor_variable
(
rstate
)
rstate
=
as_tensor_variable
(
rstate
)
size
=
as_tensor_variable
(
size
)
return
Apply
(
self
,
return
Apply
(
self
,
[
rstate
,
size
],
[
rstate
,
size
],
[
rstate
.
type
(),
output_type
])
[
rstate
.
type
(),
output_type
])
...
@@ -337,7 +354,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -337,7 +354,7 @@ class mrg_uniform(mrg_uniform_base):
op
=
cls
(
TensorType
(
dtype
,
(
False
,)
*
ndim
))
op
=
cls
(
TensorType
(
dtype
,
(
False
,)
*
ndim
))
return
op
(
rstate
,
v_size
)
return
op
(
rstate
,
v_size
)
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
,
params
):
rstate
,
size
=
inp
rstate
,
size
=
inp
o_rstate
,
o_sample
=
out
o_rstate
,
o_sample
=
out
n_elements
=
1
n_elements
=
1
...
@@ -371,45 +388,105 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -371,45 +388,105 @@ class mrg_uniform(mrg_uniform_base):
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
def
c_support_code
(
self
):
return
"
\n
"
.
join
(
"""
void cpu_rng_mrg_uniform_
%(dtype)
s(PyArrayObject* o_sample, PyArrayObject* o_rstate,
npy_int64 n_elements, int n_streams) {
const npy_int32 i0 = 0;
const npy_int32 i7 = 7;
const npy_int32 i9 = 9;
const npy_int32 i15 = 15;
const npy_int32 i16 = 16;
const npy_int32 i22 = 22;
const npy_int32 i24 = 24;
const npy_int32 M1 = 2147483647; //2^31 - 1
const npy_int32 M2 = 2147462579; //2^31 - 21069
const npy_int32 MASK12 = 511; //2^9 - 1
const npy_int32 MASK13 = 16777215; //2^24 - 1
const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MULT2 = 21069;
%(dtype)
s* sample_data = (
%(dtype)
s *) PyArray_DATA(o_sample);
npy_int32* state_data = (npy_int32 *) PyArray_DATA(o_rstate);
for (int i = 0; i < n_elements; ++i)
{
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
npy_int32 y1, y2, x11, x12, x13, x21, x22, x23;
x11 = state_data_i[0];
x12 = state_data_i[1];
x13 = state_data_i[2];
x21 = state_data_i[3];
x22 = state_data_i[4];
x23 = state_data_i[5];
y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24);
if ((y1 < 0 || y1 >= M1)) //must also check overflow
y1 -= M1;
y1 += x13;
if ((y1 < 0 or y1 >= M1))
y1 -= M1;
x13 = x12;
x12 = x11;
x11 = y1;
y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
if (y1 < 0 || y1 >= M2)
y1 -= M2;
y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += x23;
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += y1;
if (y2 < 0 or y2 >= M2)
y2 -= M2;
x23 = x22;
x22 = x21;
x21 = y2;
if (x11 <= x21) {
assert((x11 - x21 + M1) <= M1);
sample_data[i] = (x11 - x21 + M1) *
%(NORM)
s;
}
else
{
assert(x11 - x21 <= M1);
sample_data[i] = (x11 - x21) *
%(NORM)
s;
}
state_data_i[0]= x11;
state_data_i[1]= x12;
state_data_i[2]= x13;
state_data_i[3]= x21;
state_data_i[4]= x22;
state_data_i[5]= x23;
}
}
"""
%
dict
(
dtype
=
dtype
,
NORM
=
NORM
)
for
dtype
,
NORM
in
(
(
'npy_float32'
,
'4.6566126e-10f'
),
(
'npy_float64'
,
'4.656612873077392578125e-10'
)
))
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
# If we try to use the C code here with something else than a
# If we try to use the C code here with something else than a
# TensorType, something is wrong (likely one of the GPU ops
# TensorType, something is wrong (likely one of the GPU ops
# not defining C code correctly).
# not defining C code correctly).
assert
isinstance
(
node
.
inputs
[
0
]
.
type
,
TensorType
)
# NB: first node should always be a TensorType, as it is created with as_tensor_variable() into make_node.
o_rstate
,
o_sample
=
out
if
self
.
inplace
:
o_rstate_requirement
=
(
'NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED'
)
else
:
o_rstate_requirement
=
(
'NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|'
'NPY_ARRAY_ALIGNED'
)
ndim
=
self
.
output_type
.
ndim
o_type_num
=
np
.
asarray
(
0
,
dtype
=
self
.
output_type
.
dtype
)
.
dtype
.
num
fail
=
sub
[
'fail'
]
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
NORM
=
'4.6566126e-10f'
# np.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# np.float32(number * M1) < 1.0
else
:
otype
=
'double'
NORM
=
'4.656612873077392578125e-10'
return
"""
return
"""
//////// <code generated by mrg_uniform>
//////// <code generated by mrg_uniform>
// The +1 is to avoid odims[0] which fails on windows
// We have to read size[i] as an int64, but odims has to be intp*
// for NumPy on 32-bit platforms.
npy_intp odims[
%(ndim)
s+1];
npy_int64 odims_i;
npy_int64 odims_i;
npy_int64 n_elements = 1;
npy_int64 n_elements = 1;
int n_streams = 0;
int n_streams = 0;
int must_alloc_sample = ((NULL ==
%(o_sample)
s)
int must_alloc_sample = ((NULL ==
%(o_sample)
s)
|| (PyArray_NDIM(
%(o_sample)
s) !=
%(
ndim)
s
)
|| (PyArray_NDIM(
%(o_sample)
s) !=
%(
params)
s->ndim
)
|| !(PyArray_ISCONTIGUOUS(
%(o_sample)
s)));
|| !(PyArray_ISCONTIGUOUS(
%(o_sample)
s)));
%(otype)
s * sample_data;
int o_rstate_requirement =
%(params)
s->inplace ?
npy_int32 * state_data;
(NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED) :
(NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED);
const npy_int32 i0 = 0;
const npy_int32 i0 = 0;
const npy_int32 i7 = 7;
const npy_int32 i7 = 7;
...
@@ -426,19 +503,27 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -426,19 +503,27 @@ class mrg_uniform(mrg_uniform_base):
const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MULT2 = 21069;
const npy_int32 MULT2 = 21069;
// We have to read size[i] as an int64, but odims has to be intp*
// for NumPy on 32-bit platforms.
npy_intp* odims = (npy_intp*)malloc(
%(params)
s->ndim * sizeof(npy_intp));
if (odims == NULL) {
PyErr_NoMemory();
%(just_fail)
s
}
if (PyArray_NDIM(
%(size)
s) != 1)
if (PyArray_NDIM(
%(size)
s) != 1)
{
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)
s
%(fail)
s
}
}
if (PyArray_DIMS(
%(size)
s)[0] !=
%(
ndim)
s
)
if (PyArray_DIMS(
%(size)
s)[0] !=
%(
params)
s->ndim
)
{
{
PyErr_Format(PyExc_ValueError, "size must have length
%%
i (not
%%
i)",
PyErr_Format(PyExc_ValueError, "size must have length
%%
i (not
%%
i)",
%(
ndim)
s
, int(PyArray_DIMS(
%(size)
s)[0]));
%(
params)
s->ndim
, int(PyArray_DIMS(
%(size)
s)[0]));
%(fail)
s
%(fail)
s
}
}
for (int i = 0; i <
%(
ndim)
s
; ++i)
for (int i = 0; i <
%(
params)
s->ndim
; ++i)
{
{
odims_i = *(dtype_
%(size)
s *)PyArray_GETPTR1(
%(size)
s, i);
odims_i = *(dtype_
%(size)
s *)PyArray_GETPTR1(
%(size)
s, i);
odims[i] = odims_i;
odims[i] = odims_i;
...
@@ -459,7 +544,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -459,7 +544,7 @@ class mrg_uniform(mrg_uniform_base):
if (must_alloc_sample)
if (must_alloc_sample)
{
{
Py_XDECREF(
%(o_sample)
s);
Py_XDECREF(
%(o_sample)
s);
%(o_sample)
s = (PyArrayObject*)PyArray_SimpleNew(
%(
ndim)
s, odims,
%(o_type_num)
s
);
%(o_sample)
s = (PyArrayObject*)PyArray_SimpleNew(
%(
params)
s->ndim, odims,
%(params)
s->otypenum
);
if(!
%(o_sample)
s) {
if(!
%(o_sample)
s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc mrg_uniform output");
PyErr_SetString(PyExc_MemoryError, "failed to alloc mrg_uniform output");
%(fail)
s
%(fail)
s
...
@@ -468,7 +553,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -468,7 +553,7 @@ class mrg_uniform(mrg_uniform_base):
Py_XDECREF(
%(o_rstate)
s);
Py_XDECREF(
%(o_rstate)
s);
%(o_rstate)
s = (PyArrayObject*)PyArray_FromAny(
%(o_rstate)
s = (PyArrayObject*)PyArray_FromAny(
(PyObject*)
%(rstate)
s,
(PyObject*)
%(rstate)
s,
NULL, 0, 0,
%(o_rstate_requirement)
s
,NULL);
NULL, 0, 0,
o_rstate_requirement
,NULL);
if (PyArray_NDIM(
%(o_rstate)
s) != 2)
if (PyArray_NDIM(
%(o_rstate)
s) != 2)
{
{
...
@@ -487,69 +572,27 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -487,69 +572,27 @@ class mrg_uniform(mrg_uniform_base):
}
}
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
sample_data = (
%(otype)
s *) PyArray_DATA(
%(o_sample)
s);
if (
%(params)
s->otype_is_float32) {
state_data = (npy_int32 *) PyArray_DATA(
%(o_rstate)
s);
cpu_rng_mrg_uniform_npy_float32(
%(o_sample)
s,
%(o_rstate)
s, n_elements, n_streams);
for (int i = 0; i < n_elements; ++i)
} else {
{
cpu_rng_mrg_uniform_npy_float64(
%(o_sample)
s,
%(o_rstate)
s, n_elements, n_streams);
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
npy_int32 y1, y2, x11, x12, x13, x21, x22, x23;
x11 = state_data_i[0];
x12 = state_data_i[1];
x13 = state_data_i[2];
x21 = state_data_i[3];
x22 = state_data_i[4];
x23 = state_data_i[5];
y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24);
if ((y1 < 0 || y1 >= M1)) //must also check overflow
y1 -= M1;
y1 += x13;
if ((y1 < 0 or y1 >= M1))
y1 -= M1;
x13 = x12;
x12 = x11;
x11 = y1;
y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
if (y1 < 0 || y1 >= M2)
y1 -= M2;
y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += x23;
if (y2 < 0 || y2 >= M2)
y2 -= M2;
y2 += y1;
if (y2 < 0 or y2 >= M2)
y2 -= M2;
x23 = x22;
x22 = x21;
x21 = y2;
if (x11 <= x21) {
assert((x11 - x21 + M1) <= M1);
sample_data[i] = (x11 - x21 + M1) *
%(NORM)
s;
}
else
{
assert(x11 - x21 <= M1);
sample_data[i] = (x11 - x21) *
%(NORM)
s;
}
state_data_i[0]= x11;
state_data_i[1]= x12;
state_data_i[2]= x13;
state_data_i[3]= x21;
state_data_i[4]= x22;
state_data_i[5]= x23;
}
}
free(odims);
//////// </ code generated by mrg_uniform>
//////// </ code generated by mrg_uniform>
"""
%
locals
()
"""
%
dict
(
rstate
=
inp
[
0
],
size
=
inp
[
1
],
o_rstate
=
out
[
0
],
o_sample
=
out
[
1
],
params
=
sub
[
'params'
],
just_fail
=
sub
[
'fail'
],
fail
=
"""
{
free(odims);
%(fail)
s
}
"""
%
dict
(
fail
=
sub
[
'fail'
]))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
8
,
)
return
(
9
,
)
def
guess_n_streams
(
size
,
warn
=
False
):
def
guess_n_streams
(
size
,
warn
=
False
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论