Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
96a4acd5
提交
96a4acd5
authored
9月 16, 2011
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adding CURAND_RandomStreams with support for uniform and normal
上级
c0fd55c9
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
417 行增加
和
0 行删除
+417
-0
__init__.py
theano/sandbox/cuda/__init__.py
+1
-0
rng_curand.py
theano/sandbox/cuda/rng_curand.py
+332
-0
test_rng_curand.py
theano/sandbox/cuda/tests/test_rng_curand.py
+84
-0
没有找到文件。
theano/sandbox/cuda/__init__.py
浏览文件 @
96a4acd5
...
@@ -170,6 +170,7 @@ if cuda_available:
...
@@ -170,6 +170,7 @@ if cuda_available:
from
basic_ops
import
host_from_gpu
,
gpu_from_host
,
as_cuda_array
from
basic_ops
import
host_from_gpu
,
gpu_from_host
,
as_cuda_array
import
opt
import
opt
import
cuda_ndarray
import
cuda_ndarray
from
rng_curand
import
CURAND_RandomStreams
def
use
(
device
,
def
use
(
device
,
...
...
theano/sandbox/cuda/rng_curand.py
0 → 100644
浏览文件 @
96a4acd5
"""
Define CURAND_RandomStreams - backed by CURAND
"""
__authors__
=
"James Bergstra"
__copyright__
=
"(c) 2011, University of Montreal"
__license__
=
"3-clause BSD License"
__contact__
=
"theano-dev@googlegroups.com"
import
sys
import
numpy
import
theano.gof
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.tensor
import
(
get_vector_length
,
cast
,
opt
)
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
config
=
theano
.
config
class
CURAND_Base
(
theano
.
gof
.
Op
):
""" Base class for a random number generator implemented in CURAND.
The random number generator itself is an opaque reference managed by CURAND.
This Op uses a generic-typed shared variable to point to a CObject that
encapsulates this opaque reference.
Each random variable is created with a generator of None.
The actual random number generator is allocated from the seed, on the first
call to allocate random numbers (see c_code).
:note:
One caveat is that the random number state is simply not serializable.
Consequently, attempts to serialize functions compiled with these random
numbers will fail.
"""
def
__init__
(
self
,
output_type
,
seed
,
destructive
):
"""
output_type: a theano type (e.g. tensor.fvector)
seed: integer
destructive: True or False (on the generator)
"""
theano
.
gof
.
Op
.
__init__
(
self
)
self
.
destructive
=
destructive
self
.
seed
=
seed
if
self
.
destructive
:
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
output_type
=
output_type
def
as_destructive
(
self
):
"""Return an destructive version of self"""
return
self
.
__class__
(
self
.
output_type
,
self
.
seed
,
destructive
=
True
)
def
_config
(
self
):
"""Return a tuple of attributes that define the Op"""
return
(
self
.
destructive
,
self
.
output_type
,
self
.
seed
,
)
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
_config
()
==
other
.
_config
()
def
__hash__
(
self
):
return
hash
((
type
(
self
),
self
.
_config
()))
def
make_node
(
self
,
generator
,
size
):
return
theano
.
gof
.
Apply
(
self
,
[
generator
,
size
],
[
generator
.
type
(),
self
.
output_type
()])
@classmethod
def
new_auto_update
(
cls
,
generator
,
ndim
,
dtype
,
size
,
seed
):
"""
Return a symbolic sample from generator.
cls dictates the random variable (e.g. uniform, normal)
"""
v_size
=
theano
.
tensor
.
as_tensor_variable
(
size
)
if
ndim
is
None
:
ndim
=
get_vector_length
(
v_size
)
self
=
cls
(
output_type
=
CudaNdarrayType
((
False
,)
*
ndim
),
seed
=
seed
,
destructive
=
False
)
o_gen
,
sample
=
self
(
generator
,
cast
(
v_size
,
'int32'
))
sample
.
generator
=
generator
# for user
sample
.
update
=
(
generator
,
o_gen
)
# for CURAND_RandomStreams
generator
.
default_update
=
o_gen
# for pfunc uses this attribute
return
sample
def
c_headers
(
self
):
return
[
"curand.h"
]
def
c_libraries
(
self
):
return
[
'curand'
]
def
c_support_code
(
self
):
return
"""
void free_generator(void *_gen)
{
curandGenerator_t * gen = (curandGenerator_t*)_gen;
curandStatus_t err = curandDestroyGenerator(*gen);
if (err != CURAND_STATUS_SUCCESS)
{
fprintf(stderr, "Failure (
%%
i) in destroying CURAND generator",
(int)err);
}
free(_gen);
}
"""
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
i_generator
,
size
=
inp
o_generator
,
o_sample
=
out
destructive
=
int
(
self
.
destructive
)
ndim
=
self
.
output_type
.
ndim
o_type_num
=
numpy
.
asarray
(
0
,
dtype
=
self
.
output_type
.
dtype
)
.
dtype
.
num
fail
=
sub
[
'fail'
]
seed
=
self
.
seed
call_string
=
self
.
_curand_call_str
(
o_sample
=
o_sample
)
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
else
:
otype
=
'double'
return
"""
//////// <code generated by CURAND_Base>
int odims[
%(ndim)
s];
int n_elements = 1;
int must_alloc_sample = ((NULL ==
%(o_sample)
s)
|| !CudaNdarray_Check(py_
%(o_sample)
s)
|| (
%(o_sample)
s->nd !=
%(ndim)
s));
if (
%(size)
s->nd != 1)
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)
s
}
if (
%(size)
s->dimensions[0] !=
%(ndim)
s)
{
PyErr_Format(PyExc_ValueError, "size must have length
%%
i (not
%%
i)",
%(ndim)
s,
%(size)
s->dimensions[0]);
%(fail)
s
}
if (
%(size)
s->descr->type_num != PyArray_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)
s
}
for (int i = 0; i <
%(ndim)
s; ++i)
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
}
if (must_alloc_sample)
{
Py_XDECREF(
%(o_sample)
s);
%(o_sample)
s = (CudaNdarray*)CudaNdarray_NewDims(
%(ndim)
s, odims);
if(!
%(o_sample)
s)
{
%(fail)
s;
}
}
if (!PyCObject_Check(
%(i_generator)
s))
{
// allocate a new generator for o_generator
Py_XDECREF(
%(o_generator)
s);
curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
assert(gen);
if (CURAND_STATUS_SUCCESS !=
curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
%(fail)
s;
}
if (CURAND_STATUS_SUCCESS !=
curandSetPseudoRandomGeneratorSeed(*gen,
%(seed)
s))
{
PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
%(fail)
s;
}
%(o_generator)
s = PyCObject_FromVoidPtr(gen, &free_generator);
assert (
%(i_generator)
s == Py_None);
}
else if (
%(destructive)
s)
{
// use i_generator for o_generator
Py_XDECREF(
%(o_generator)
s);
Py_INCREF(
%(i_generator)
s);
%(o_generator)
s =
%(i_generator)
s;
}
else
{
// copy i_generator for o_generator
PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
%(fail)
s;
}
{
curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(
%(o_generator)
s);
curandStatus_t err =
%(call_string)
s
if (err != CURAND_STATUS_SUCCESS)
{
PyErr_Format(PyExc_RuntimeError, "curand error generating random normals
%%
i", (int)err);
%(fail)
s;
}
cudaThreadSynchronize();
}
//////// </ code generated by CURAND_Base>
"""
%
locals
()
def
c_code_cache_version
(
self
):
return
(
1
,)
class
CURAND_Normal
(
CURAND_Base
):
"""Op to draw normal numbers using CURAND
"""
def
_curand_call_str
(
self
,
**
kwargs
):
return
"""curandGenerateNormal(*gen,
CudaNdarray_DEV_DATA(
%(o_sample)
s),
n_elements,
0.0, 1.0);
"""
%
kwargs
class
CURAND_Uniform
(
CURAND_Base
):
"""Op to draw uniform numbers using CURAND
"""
def
_curand_call_str
(
self
,
**
kwargs
):
return
""" curandGenerateUniform(*gen,
CudaNdarray_DEV_DATA(
%(o_sample)
s),
n_elements);
"""
%
kwargs
class
CURAND_RandomStreams
(
object
):
"""RandomStreams instance that creates CURAND-based random variables.
One caveat is that generators are not serializable.
"""
def
__init__
(
self
,
seed
):
""" seed: int
"""
self
.
_start_seed
=
seed
self
.
_cur_seed
=
seed
self
.
_has_lost_states
=
False
#True if self.state_updates is incomplete
self
.
state_updates
=
[]
def
updates
(
self
):
"""List of all (old, new) generator update pairs created by this
instance.
"""
return
list
(
self
.
state_updates
)
def
next_seed
(
self
):
"""Return a unique seed for initializing a random variable.
"""
self
.
_cur_seed
+=
1
return
self
.
_cur_seed
-
1
def
__getstate__
(
self
):
rval
=
dict
(
self
.
__dict__
)
# the CObject used to store updates cannot be serialized
rval
[
'state_updates'
]
=
[]
rval
[
'_has_lost_states'
]
=
True
return
rval
def
uniform
(
self
,
size
,
low
=
0.0
,
high
=
1.0
,
ndim
=
None
,
dtype
=
config
.
floatX
):
"""
Return symbolic tensor of uniform numbers.
"""
if
isinstance
(
size
,
tuple
):
msg
=
"size must be a tuple of int or a Theano variable"
assert
all
([
isinstance
(
i
,
int
)
or
isinstance
(
i
,
Variable
)
for
i
in
size
]),
msg
else
:
msg
=
"size must be a tuple of int or a Theano variable"
assert
isinstance
(
size
,
Variable
)
and
size
.
ndim
==
1
,
msg
generator
=
theano
.
shared
(
None
)
#makes a generic
s_size
=
theano
.
tensor
.
as_tensor_variable
(
size
)
u
=
CURAND_Uniform
.
new_auto_update
(
generator
,
ndim
,
dtype
,
s_size
,
self
.
next_seed
())
self
.
state_updates
.
append
(
u
.
update
)
rval
=
u
*
(
high
-
low
)
+
low
if
u
.
type
.
broadcastable
!=
rval
.
type
.
broadcastable
:
raise
NotImplementedError
(
'Increase the size to match the broadcasting pattern of `low` and `high` arguments'
)
return
rval
def
normal
(
self
,
size
=
None
,
avg
=
0.0
,
std
=
1.0
,
ndim
=
None
,
dtype
=
config
.
floatX
):
"""
Return symbolic tensor of normally-distributed numbers.
:param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
"""
if
isinstance
(
size
,
tuple
):
msg
=
"size must be a tuple of int or a Theano variable"
assert
all
([
isinstance
(
i
,
int
)
or
isinstance
(
i
,
Variable
)
for
i
in
size
]),
msg
else
:
msg
=
"size must be a tuple of int or a Theano variable"
assert
isinstance
(
size
,
Variable
)
and
size
.
ndim
==
1
,
msg
generator
=
theano
.
shared
(
None
)
#makes a generic
s_size
=
theano
.
tensor
.
as_tensor_variable
(
size
)
u
=
CURAND_Normal
.
new_auto_update
(
generator
,
ndim
,
dtype
,
s_size
,
self
.
next_seed
())
self
.
state_updates
.
append
(
u
.
update
)
rval
=
u
*
std
+
avg
if
u
.
type
.
broadcastable
!=
rval
.
type
.
broadcastable
:
raise
NotImplementedError
(
'Increase the size to match the broadcasting pattern of `low` and `high` arguments'
)
return
rval
@local_optimizer
([
None
])
def
local_destructive
(
node
):
op
=
node
.
op
if
isinstance
(
op
,
CURAND_Base
)
and
not
op
.
destructive
:
# op might be gpu version
new_op
=
op
.
as_destructive
()
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
False
optdb
.
register
(
'CURAND_destructive'
,
opt
.
in2out
(
local_destructive
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
theano/sandbox/cuda/tests/test_rng_curand.py
0 → 100644
浏览文件 @
96a4acd5
import
numpy
import
theano
from
theano.sandbox.cuda.rng_curand
import
CURAND_RandomStreams
from
theano.sandbox.rng_mrg
import
MRG_RandomStreams
def
test_uniform_basic
():
rng
=
CURAND_RandomStreams
(
234
)
u0
=
rng
.
uniform
((
10
,
10
))
u1
=
rng
.
uniform
((
10
,
10
))
f0
=
theano
.
function
([],
u0
)
f1
=
theano
.
function
([],
u1
)
v0list
=
[
f0
()
for
i
in
range
(
3
)]
v1list
=
[
f1
()
for
i
in
range
(
3
)]
#print v0list
#print v1list
# assert that elements are different in a few ways
assert
numpy
.
all
(
v0list
[
0
]
!=
v0list
[
1
])
assert
numpy
.
all
(
v1list
[
0
]
!=
v1list
[
1
])
assert
numpy
.
all
(
v0list
[
0
]
!=
v1list
[
0
])
for
v
in
v0list
:
assert
v
.
shape
==
(
10
,
10
)
assert
v
.
min
()
>=
0
assert
v
.
max
()
<=
1
assert
v
.
min
()
<
v
.
max
()
assert
.
25
<=
v
.
mean
()
<=
.
75
def
test_normal_basic
():
rng
=
CURAND_RandomStreams
(
234
)
u0
=
rng
.
normal
((
10
,
10
))
u1
=
rng
.
normal
((
10
,
10
))
f0
=
theano
.
function
([],
u0
)
f1
=
theano
.
function
([],
u1
)
v0list
=
[
f0
()
for
i
in
range
(
3
)]
v1list
=
[
f1
()
for
i
in
range
(
3
)]
#print v0list
#print v1list
# assert that elements are different in a few ways
assert
numpy
.
all
(
v0list
[
0
]
!=
v0list
[
1
])
assert
numpy
.
all
(
v1list
[
0
]
!=
v1list
[
1
])
assert
numpy
.
all
(
v0list
[
0
]
!=
v1list
[
0
])
for
v
in
v0list
:
assert
v
.
shape
==
(
10
,
10
)
assert
v
.
min
()
<
v
.
max
()
assert
-.
5
<=
v
.
mean
()
<=
.
5
def
test_speed
():
mrg
=
MRG_RandomStreams
()
crn
=
CURAND_RandomStreams
(
234
)
N
=
1000
*
100
dest
=
theano
.
shared
(
numpy
.
zeros
(
N
,
dtype
=
theano
.
config
.
floatX
))
mrg_u
=
theano
.
function
([],
[],
updates
=
{
dest
:
mrg
.
uniform
((
N
,))},
profile
=
'mrg uniform'
)
crn_u
=
theano
.
function
([],
[],
updates
=
{
dest
:
crn
.
uniform
((
N
,))},
profile
=
'crn uniform'
)
mrg_n
=
theano
.
function
([],
[],
updates
=
{
dest
:
mrg
.
normal
((
N
,))},
profile
=
'mrg normal'
)
crn_n
=
theano
.
function
([],
[],
updates
=
{
dest
:
crn
.
normal
((
N
,))},
profile
=
'crn normal'
)
for
f
in
mrg_u
,
crn_u
,
mrg_n
,
crn_n
:
# don't time the first call, it has some startup cost
print
'DEBUGPRINT'
print
'----------'
theano
.
printing
.
debugprint
(
f
)
for
i
in
range
(
100
):
for
f
in
mrg_u
,
crn_u
,
mrg_n
,
crn_n
:
# don't time the first call, it has some startup cost
f
.
fn
.
time_thunks
=
(
i
>
0
)
f
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论