Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9950ce08
提交
9950ce08
authored
11月 06, 2013
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1580 from nouiz/deprecated
[MRG]Deprecated
上级
1d639d66
7c42bebe
全部展开
显示空白字符变更
内嵌
并排
正在显示
23 个修改的文件
包含
138 行增加
和
112 行删除
+138
-112
MANIFEST.in
MANIFEST.in
+1
-0
fibby.txt
doc/extending/fibby.txt
+3
-3
basic.txt
doc/library/tensor/basic.txt
+32
-3
test_debugmode.py
theano/compile/tests/test_debugmode.py
+4
-4
cmodule.py
theano/gof/cmodule.py
+1
-1
lazylinker_c.c
theano/gof/lazylinker_c.c
+0
-0
lazylinker_c.py
theano/gof/lazylinker_c.py
+1
-4
check_blas.py
theano/misc/check_blas.py
+1
-0
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+53
-68
rng_mrg.py
theano/sandbox/rng_mrg.py
+42
-29
scan_perform.c
theano/scan_module/scan_perform.c
+0
-0
scan_perform.pyx
theano/scan_module/scan_perform.pyx
+0
-0
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+0
-0
basic.py
theano/sparse/basic.py
+0
-0
opt.py
theano/sparse/opt.py
+0
-0
basic.py
theano/tensor/basic.py
+0
-0
elemwise.py
theano/tensor/elemwise.py
+0
-0
nnet.py
theano/tensor/nnet/nnet.py
+0
-0
subtensor.py
theano/tensor/subtensor.py
+0
-0
test_blas.py
theano/tensor/tests/test_blas.py
+0
-0
test_inc_subtensor.py
theano/tensor/tests/test_inc_subtensor.py
+0
-0
type.py
theano/tensor/type.py
+0
-0
test_tutorial.py
theano/tests/test_tutorial.py
+0
-0
没有找到文件。
MANIFEST.in
浏览文件 @
9950ce08
global-include *.txt
global-include *.c
global-include *.cu
global-include *.cuh
global-include *.sh
...
...
doc/extending/fibby.txt
浏览文件 @
9950ce08
...
...
@@ -67,9 +67,9 @@ you should check the strides and alignment.
if (!%(y)s)
%(fail)s;
{//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)
%(y)s->data
;
dtype_%(x)s * x = (dtype_%(x)s*)
%(x)s->data
;
for (int i = 2; i <
%(x)s->dimensions
[0]; ++i)
dtype_%(y)s * y = (dtype_%(y)s*)
PyArray_DATA(%(y)s)
;
dtype_%(x)s * x = (dtype_%(x)s*)
PyArray_DATA(%(x)s)
;
for (int i = 2; i <
PyArray_DIMS(%(x)s)
[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
}
""" % locals()
...
...
doc/library/tensor/basic.txt
浏览文件 @
9950ce08
...
...
@@ -420,7 +420,9 @@ TensorVariable
.. class:: _tensor_py_operators(object)
This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`).
This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type
...
...
@@ -472,6 +474,10 @@ TensorVariable
See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
.. attribute:: T
Transpose of this tensor.
...
...
@@ -485,8 +491,31 @@ TensorVariable
same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy()
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
.. method:: nonzero_values(self)
.. method:: sort(self, axis=-1, kind='quicksort', order=None)
.. method:: argsort(self, axis=-1, kind='quicksort', order=None)
.. method:: clip(self, a_min, a_max)
.. method:: conf()
.. method:: repeat(repeats, axis=None)
.. method:: round(mode="half_away_from_zero")
.. method:: trace()
.. method:: get_scalar_constant_value()
.. method:: zeros_like(model, dtype=None)
All the above methods are equivalent to NumPy for Theano on the current tensor.
.. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
Those elemwise operation are supported via Python syntax.
Shaping and Shuffling
=====================
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
9950ce08
...
...
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
prep_vars
=
"""
//the output array has size M x N
npy_intp M = PyArray_DIMS(
%(a)
s)[0];
npy_intp Sa =
%(a)
s->strides
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sz =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sa =
PyArray_STRIDES(
%(a)
s)
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_double * Da = (npy_double*)
%(a)
s->data
;
npy_double * Dz = (npy_double*)
%(z)
s->data
;
npy_double * Da = (npy_double*)
PyArray_BYTES(
%(a)
s)
;
npy_double * Dz = (npy_double*)
PyArray_BYTES(
%(z)
s)
;
//clear the output array
for (npy_intp m = 0; m < M; ++m)
...
...
theano/gof/cmodule.py
浏览文件 @
9950ce08
...
...
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#that use the old API.
#
cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
cxxflags
.
append
(
"-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"
)
numpy_ver
=
[
int
(
n
)
for
n
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
# numpy 1.7 deprecated the following macro but the new one didn't
...
...
theano/gof/lazylinker_c.c
.txt
→
theano/gof/lazylinker_c.c
浏览文件 @
9950ce08
File moved
theano/gof/lazylinker_c.py
浏览文件 @
9950ce08
...
...
@@ -76,10 +76,7 @@ except ImportError:
except
ImportError
:
_logger
.
info
(
"Compiling new CVM"
)
dirname
=
'lazylinker_ext'
# We use a .txt extensions as otherwise it don't get
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c.txt'
)
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c'
)
code
=
open
(
cfile
)
.
read
()
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
...
...
theano/misc/check_blas.py
浏览文件 @
9950ce08
...
...
@@ -220,6 +220,7 @@ if __name__ == "__main__":
GTX 650 Ti 0.27s
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
9950ce08
...
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data
=
"""
#define CudaNdarray_set_device_data2(obj, ptr, base)
\
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
"""
Implement subtensor on the gpu.
...
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim
=
node
.
outputs
[
0
]
.
ndim
fail
=
sub
[
'fail'
]
decl
=
"CudaNdarray* xview = NULL;"
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
view_ndim
=
view_ndim
,
c_prefix
=
'CudaNdarray'
,
strides_mul
=
4
,
)
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
CudaNdarray*
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (!xview)
{
%(fail)
s;
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
...
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)
s;
}
cnda_mark_dev_structure_dirty(xview);
"""
%
locals
()
get_xview
=
_define_set_data
+
\
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
c_prefix
=
'CudaNdarray'
,
set_data
=
'CudaNdarray_set_device_data2'
,
set_dim
=
'CudaNdarray_set_dim'
,
set_stride
=
'CudaNdarray_set_stride'
,
update_flags
=
""
,
strides_mul
=
4
)
finish_view
=
""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for
idx
in
range
(
node
.
outputs
[
0
]
.
ndim
):
finish_view
+=
"""
if(CudaNdarray_HOST_DIMS(xview)[
%(idx)
s]==1)
CudaNdarray_set_stride(xview,
%(idx)
s, 0);
"""
%
locals
()
finish_view
+=
"""
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)
s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)
s;
for(int idx=0;idx <
%(view_ndim)
s; idx++){
//For broadcasted dimensions, set the strides to 0
//We can't do that only for broadcasted dimensions as this can happen
//for dimensions of size 0. That are rebroadcated later.
if(xview_dims[idx]==1)
CudaNdarray_set_stride(xview, idx, 0);
else
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
}
"""
%
locals
()
finish_view
=
"""
Py_XDECREF(
%(z)
s);
%(z)
s = xview;
"""
%
locals
()
return
build_view
+
"{"
+
get_xview
+
"}"
+
finish_view
return
decl
+
get_xview
+
build_view
+
finish_view
def
c_code_cache_version
(
self
):
hv
=
self
.
helper_c_code_cache_version
()
...
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""
%
locals
()
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
"""
Implement IncSubtensor on the gpu.
...
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
def
decl_view
(
self
):
return
"CudaNdarray* zview = NULL;"
def
make_view_array
(
self
,
x
,
view_ndim
):
"""
:param x: a string identifying an array to be viewed
...
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
return
"""CudaNdarray* zview = (CudaNdarray*)
CudaNdarray_New(
%(view_ndim)
s)"""
%
locals
()
ret
=
"""zview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <
%(view_ndim)
s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
"""
%
locals
()
return
ret
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to use with helper_c_code"""
return
{
'update_flags'
:
""
,
'c_prefix'
:
'CudaNdarray'
,
'set_data'
:
'CudaNdarray_set_device_data2'
,
'set_dim'
:
'CudaNdarray_set_dim'
,
'set_stride'
:
'CudaNdarray_set_stride'
,
'update_flags'
:
""
,
return
{
'c_prefix'
:
'CudaNdarray'
,
'strides_mul'
:
4
}
...
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
def
define_set_data
(
self
):
return
_define_set_data
def
link_view_array
(
self
,
x
,
fail
):
return
"""
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)
s;
}
cnda_mark_dev_structure_dirty(zview);
"""
%
locals
()
def
set_view_base
(
self
,
x
,
fail
):
return
"""
//Set the base only now
...
...
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def
add_to_zview
(
self
,
x
,
fail
):
return
"""
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_
%(x)
s);
...
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
%
locals
()
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
if
parent_version
:
return
parent_version
+
(
0
,)
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
9950ce08
...
...
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
"""
import
sys
,
warnings
import
warnings
import
numpy
from
theano
import
Op
,
Apply
,
shared
,
config
,
Variable
from
theano.tensor
import
(
raw_random
,
TensorType
,
as_tensor_variable
,
get_vector_length
,
cast
,
opt
,
scal
)
from
theano.tensor
import
zeros_like
,
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.tensor
import
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
from
theano.gof.python25
import
all
,
any
...
...
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
x
[
i
]
=
r
+
m
return
x
def
multMatVect
(
v
,
A
,
m1
,
B
,
m2
):
#multiply the first half of v by A with a modulo of m1
#and the second half by B with a modulo of m2
...
...
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
[
1401213391
,
1178684362
,
1431130166
]])
np_int32_vals
=
[
numpy
.
int32
(
i
)
for
i
in
(
0
,
7
,
9
,
15
,
16
,
22
,
24
)]
def
ff_2p134
(
rstate
):
return
multMatVect
(
rstate
,
A1p134
,
M1
,
A2p134
,
M2
)
def
ff_2p72
(
rstate
):
return
multMatVect
(
rstate
,
A1p72
,
M1
,
A2p72
,
M2
)
...
...
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
i0
,
i7
,
i9
,
i15
,
i16
,
i22
,
i24
=
np_int32_vals
#first component
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
+
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
+
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
assert
type
(
y1
)
==
numpy
.
int32
if
(
y1
<
0
or
y1
>=
M1
):
#must also check overflow
...
...
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
else
:
return
(
x11
-
x21
)
*
NORM
class
mrg_uniform_base
(
Op
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
Op
.
__init__
(
self
)
...
...
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
self
.
warned_numpy_version
=
False
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
\
and
self
.
output_type
==
other
.
output_type
\
and
self
.
inplace
==
other
.
inplace
return
(
type
(
self
)
==
type
(
other
)
and
self
.
output_type
==
other
.
output_type
and
self
.
inplace
==
other
.
inplace
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
output_type
)
^
hash
(
self
.
inplace
)
def
__str__
(
self
):
if
self
.
inplace
:
s
=
"inplace"
else
:
s
=
"no_inplace"
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
else
:
s
=
"no_inplace"
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
def
make_node
(
self
,
rstate
,
size
):
# error checking slightly redundant here, since
...
...
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
[
rstate
,
size
],
[
rstate
.
type
(),
self
.
output_type
()])
def
grad
(
self
,
inputs
,
ograd
):
def
grad
(
self
,
inputs
,
ograd
):
return
[
None
for
i
in
inputs
]
def
R_op
(
self
,
inputs
,
eval_points
):
...
...
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
def
perform
(
self
,
node
,
inp
,
out
):
rstate
,
size
=
inp
o_rstate
,
o_sample
=
out
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
print
"Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
self
.
warned_numpy_version
=
True
...
...
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
for
s
in
size
:
n_elements
*=
s
n_streams
,
_
=
rstate
.
shape
n_streams
,
_
=
rstate
.
shape
rval
=
numpy
.
zeros
(
n_elements
,
dtype
=
self
.
output_type
.
dtype
)
err_orig
=
numpy
.
seterr
(
over
=
'ignore'
)
try
:
for
i
in
xrange
(
n_elements
):
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
rval
[
i
]
=
sample
finally
:
numpy
.
seterr
(
**
err_orig
)
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
...
...
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
fail
=
sub
[
'fail'
]
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
else
:
...
...
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
}
for (int i = 0; i <
%(ndim)
s; ++i)
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(
%(o_sample)
s)[i] != odims[i]);
//fprintf(stderr, "size
%%
i
%%
i
\\
n", i, (int)odims[i]);
...
...
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
}
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
sample_data = (
%(otype)
s *)
%(o_sample)
s->data
;
state_data = (npy_int32 *)
%(o_rstate)
s->data
;
sample_data = (
%(otype)
s *)
PyArray_DATA(
%(o_sample)
s)
;
state_data = (npy_int32 *)
PyArray_DATA(
%(o_rstate)
s)
;
for (int i = 0; i < n_elements; ++i)
{
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
...
...
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
def
c_support_code_apply
(
self
,
node
,
nodename
):
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
else
:
...
...
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
"""
%
locals
()
"""
%
locals
()
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
...
...
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
else
:
otype
=
'double'
SYNC
=
"CNDA_THREAD_SYNC"
;
SYNC
=
"CNDA_THREAD_SYNC"
return
"""
//////// <code generated by mrg_uniform>
...
...
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
for (int i = 0; i <
%(ndim)
s; ++i)
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
...
...
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
//////// </ code generated by mrg_uniform>
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
return
(
7
,)
...
...
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
elif
seed
>=
M2
:
raise
ValueError
(
'seed should be less than
%
i'
%
M2
,
seed
)
self
.
rstate
=
numpy
.
asarray
([
seed
]
*
6
,
dtype
=
'int32'
)
elif
len
(
seed
)
==
6
:
elif
len
(
seed
)
==
6
:
if
seed
[
0
]
==
0
and
seed
[
1
]
==
0
and
seed
[
2
]
==
0
:
raise
ValueError
(
'The first 3 values of seed should not be all 0'
,
seed
)
if
seed
[
3
]
==
0
and
seed
[
4
]
==
0
and
seed
[
5
]
==
0
:
...
...
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
"""
assert
n_streams
<
2
**
72
assert
n_streams
>
0
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
[
0
]
=
self
.
rstate
for
i
in
xrange
(
1
,
n_streams
):
rval
[
i
]
=
ff_2p72
(
rval
[
i
-
1
])
...
...
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
# currently no Theano node that will do a frombuffer
# reinterpretation.
u
=
self
.
pretty_return
(
node_rstate
,
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
else
:
node_rstate
=
shared
(
self
.
get_substream_rstates
(
nstreams
))
u
=
self
.
pretty_return
(
node_rstate
,
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
r
=
u
*
(
high
-
low
)
+
low
if
u
.
type
.
broadcastable
!=
r
.
type
.
broadcastable
:
...
...
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
new_op
=
op
.
__class__
(
op
.
output_type
,
inplace
=
True
)
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
False
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
theano/scan_module/scan_perform.c
.txt
→
theano/scan_module/scan_perform.c
浏览文件 @
9950ce08
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/scan_module/scan_perform.pyx
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/scan_module/scan_perform_ext.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/sparse/basic.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/sparse/opt.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/basic.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/elemwise.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/nnet/nnet.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/subtensor.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/tests/test_blas.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/tests/test_inc_subtensor.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/type.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tests/test_tutorial.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论