Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9950ce08
提交
9950ce08
authored
11月 06, 2013
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1580 from nouiz/deprecated
[MRG]Deprecated
上级
1d639d66
7c42bebe
全部展开
显示空白字符变更
内嵌
并排
正在显示
23 个修改的文件
包含
138 行增加
和
112 行删除
+138
-112
MANIFEST.in
MANIFEST.in
+1
-0
fibby.txt
doc/extending/fibby.txt
+3
-3
basic.txt
doc/library/tensor/basic.txt
+32
-3
test_debugmode.py
theano/compile/tests/test_debugmode.py
+4
-4
cmodule.py
theano/gof/cmodule.py
+1
-1
lazylinker_c.c
theano/gof/lazylinker_c.c
+0
-0
lazylinker_c.py
theano/gof/lazylinker_c.py
+1
-4
check_blas.py
theano/misc/check_blas.py
+1
-0
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+53
-68
rng_mrg.py
theano/sandbox/rng_mrg.py
+42
-29
scan_perform.c
theano/scan_module/scan_perform.c
+0
-0
scan_perform.pyx
theano/scan_module/scan_perform.pyx
+0
-0
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+0
-0
basic.py
theano/sparse/basic.py
+0
-0
opt.py
theano/sparse/opt.py
+0
-0
basic.py
theano/tensor/basic.py
+0
-0
elemwise.py
theano/tensor/elemwise.py
+0
-0
nnet.py
theano/tensor/nnet/nnet.py
+0
-0
subtensor.py
theano/tensor/subtensor.py
+0
-0
test_blas.py
theano/tensor/tests/test_blas.py
+0
-0
test_inc_subtensor.py
theano/tensor/tests/test_inc_subtensor.py
+0
-0
type.py
theano/tensor/type.py
+0
-0
test_tutorial.py
theano/tests/test_tutorial.py
+0
-0
没有找到文件。
MANIFEST.in
浏览文件 @
9950ce08
global-include *.txt
global-include *.txt
global-include *.c
global-include *.cu
global-include *.cu
global-include *.cuh
global-include *.cuh
global-include *.sh
global-include *.sh
...
...
doc/extending/fibby.txt
浏览文件 @
9950ce08
...
@@ -67,9 +67,9 @@ you should check the strides and alignment.
...
@@ -67,9 +67,9 @@ you should check the strides and alignment.
if (!%(y)s)
if (!%(y)s)
%(fail)s;
%(fail)s;
{//New scope needed to make compilation work
{//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)
%(y)s->data
;
dtype_%(y)s * y = (dtype_%(y)s*)
PyArray_DATA(%(y)s)
;
dtype_%(x)s * x = (dtype_%(x)s*)
%(x)s->data
;
dtype_%(x)s * x = (dtype_%(x)s*)
PyArray_DATA(%(x)s)
;
for (int i = 2; i <
%(x)s->dimensions
[0]; ++i)
for (int i = 2; i <
PyArray_DIMS(%(x)s)
[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
y[i] = y[i-1]*y[i-2] + x[i];
}
}
""" % locals()
""" % locals()
...
...
doc/library/tensor/basic.txt
浏览文件 @
9950ce08
...
@@ -420,7 +420,9 @@ TensorVariable
...
@@ -420,7 +420,9 @@ TensorVariable
.. class:: _tensor_py_operators(object)
.. class:: _tensor_py_operators(object)
This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`).
This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type
.. attribute:: type
...
@@ -472,6 +474,10 @@ TensorVariable
...
@@ -472,6 +474,10 @@ TensorVariable
See :func:`flatten`.
See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
.. attribute:: T
.. attribute:: T
Transpose of this tensor.
Transpose of this tensor.
...
@@ -485,8 +491,31 @@ TensorVariable
...
@@ -485,8 +491,31 @@ TensorVariable
same vector! Use `reshape` or `dimshuffle` to turn your vector
same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix.
into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy()
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
.. method:: nonzero_values(self)
.. method:: sort(self, axis=-1, kind='quicksort', order=None)
.. method:: argsort(self, axis=-1, kind='quicksort', order=None)
.. method:: clip(self, a_min, a_max)
.. method:: conf()
.. method:: repeat(repeats, axis=None)
.. method:: round(mode="half_away_from_zero")
.. method:: trace()
.. method:: get_scalar_constant_value()
.. method:: zeros_like(model, dtype=None)
All the above methods are equivalent to NumPy for Theano on the current tensor.
.. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
Those elemwise operation are supported via Python syntax.
Shaping and Shuffling
Shaping and Shuffling
=====================
=====================
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
9950ce08
...
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
...
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
prep_vars
=
"""
prep_vars
=
"""
//the output array has size M x N
//the output array has size M x N
npy_intp M = PyArray_DIMS(
%(a)
s)[0];
npy_intp M = PyArray_DIMS(
%(a)
s)[0];
npy_intp Sa =
%(a)
s->strides
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sa =
PyArray_STRIDES(
%(a)
s)
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sz =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_double * Da = (npy_double*)
%(a)
s->data
;
npy_double * Da = (npy_double*)
PyArray_BYTES(
%(a)
s)
;
npy_double * Dz = (npy_double*)
%(z)
s->data
;
npy_double * Dz = (npy_double*)
PyArray_BYTES(
%(z)
s)
;
//clear the output array
//clear the output array
for (npy_intp m = 0; m < M; ++m)
for (npy_intp m = 0; m < M; ++m)
...
...
theano/gof/cmodule.py
浏览文件 @
9950ce08
...
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
...
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
#to use the new API, but not everywhere. When finished, enable
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#the following macro to assert that we don't bring new code
#that use the old API.
#that use the old API.
#
cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
cxxflags
.
append
(
"-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"
)
numpy_ver
=
[
int
(
n
)
for
n
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
numpy_ver
=
[
int
(
n
)
for
n
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
# numpy 1.7 deprecated the following macro but the new one didn't
# numpy 1.7 deprecated the following macro but the new one didn't
...
...
theano/gof/lazylinker_c.c
.txt
→
theano/gof/lazylinker_c.c
浏览文件 @
9950ce08
File moved
theano/gof/lazylinker_c.py
浏览文件 @
9950ce08
...
@@ -76,10 +76,7 @@ except ImportError:
...
@@ -76,10 +76,7 @@ except ImportError:
except
ImportError
:
except
ImportError
:
_logger
.
info
(
"Compiling new CVM"
)
_logger
.
info
(
"Compiling new CVM"
)
dirname
=
'lazylinker_ext'
dirname
=
'lazylinker_ext'
# We use a .txt extensions as otherwise it don't get
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c'
)
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c.txt'
)
code
=
open
(
cfile
)
.
read
()
code
=
open
(
cfile
)
.
read
()
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
...
...
theano/misc/check_blas.py
浏览文件 @
9950ce08
...
@@ -220,6 +220,7 @@ if __name__ == "__main__":
...
@@ -220,6 +220,7 @@ if __name__ == "__main__":
GTX 650 Ti 0.27s
GTX 650 Ti 0.27s
GTX 460 0.37s 0.45s
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GTX 550 Ti 0.57s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
9950ce08
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data
=
"""
#define CudaNdarray_set_device_data2(obj, ptr, base)
\
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
"""
"""
Implement subtensor on the gpu.
Implement subtensor on the gpu.
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim
=
node
.
outputs
[
0
]
.
ndim
view_ndim
=
node
.
outputs
[
0
]
.
ndim
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
decl
=
"CudaNdarray* xview = NULL;"
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
view_ndim
=
view_ndim
,
c_prefix
=
'CudaNdarray'
,
strides_mul
=
4
,
)
build_view
=
"""
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
//TODO: alternatively, fix the memory leak on failure
CudaNdarray*
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (!xview)
if (!xview)
{
{
%(fail)
s;
%(fail)
s;
}
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
{
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
"GpuSubtensor is not able to set the"
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)
s;
%(fail)
s;
}
}
cnda_mark_dev_structure_dirty(xview);
cnda_mark_dev_structure_dirty(xview);
"""
%
locals
()
for(int idx=0;idx <
%(view_ndim)
s; idx++){
//For broadcasted dimensions, set the strides to 0
get_xview
=
_define_set_data
+
\
//We can't do that only for broadcasted dimensions as this can happen
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
//for dimensions of size 0. That are rebroadcated later.
self
.
idx_list
,
if(xview_dims[idx]==1)
c_prefix
=
'CudaNdarray'
,
CudaNdarray_set_stride(xview, idx, 0);
set_data
=
'CudaNdarray_set_device_data2'
,
else
set_dim
=
'CudaNdarray_set_dim'
,
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
set_stride
=
'CudaNdarray_set_stride'
,
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
update_flags
=
""
,
strides_mul
=
4
)
finish_view
=
""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for
idx
in
range
(
node
.
outputs
[
0
]
.
ndim
):
finish_view
+=
"""
if(CudaNdarray_HOST_DIMS(xview)[
%(idx)
s]==1)
CudaNdarray_set_stride(xview,
%(idx)
s, 0);
"""
%
locals
()
finish_view
+=
"""
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)
s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)
s;
}
}
"""
%
locals
()
finish_view
=
"""
Py_XDECREF(
%(z)
s);
Py_XDECREF(
%(z)
s);
%(z)
s = xview;
%(z)
s = xview;
"""
%
locals
()
"""
%
locals
()
return
build_view
+
"{"
+
get_xview
+
"}"
+
finish_view
return
decl
+
get_xview
+
build_view
+
finish_view
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
hv
=
self
.
helper_c_code_cache_version
()
hv
=
self
.
helper_c_code_cache_version
()
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""
%
locals
()
"""
%
locals
()
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
"""
"""
Implement IncSubtensor on the gpu.
Implement IncSubtensor on the gpu.
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
"""
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
def
decl_view
(
self
):
return
"CudaNdarray* zview = NULL;"
def
make_view_array
(
self
,
x
,
view_ndim
):
def
make_view_array
(
self
,
x
,
view_ndim
):
"""
"""
:param x: a string identifying an array to be viewed
:param x: a string identifying an array to be viewed
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
right indexing; we'll do that manually later.
"""
"""
return
"""CudaNdarray* zview = (CudaNdarray*)
ret
=
"""zview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
CudaNdarray_New(
%(view_ndim)
s)"""
%
locals
()
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <
%(view_ndim)
s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
"""
%
locals
()
return
ret
def
get_helper_c_code_args
(
self
):
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to use with helper_c_code"""
""" Return a dictionary of arguments to use with helper_c_code"""
return
{
'update_flags'
:
""
,
return
{
'c_prefix'
:
'CudaNdarray'
,
'c_prefix'
:
'CudaNdarray'
,
'set_data'
:
'CudaNdarray_set_device_data2'
,
'set_dim'
:
'CudaNdarray_set_dim'
,
'set_stride'
:
'CudaNdarray_set_stride'
,
'update_flags'
:
""
,
'strides_mul'
:
4
'strides_mul'
:
4
}
}
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
"""
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
def
define_set_data
(
self
):
return
_define_set_data
def
link_view_array
(
self
,
x
,
fail
):
return
"""
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)
s;
}
cnda_mark_dev_structure_dirty(zview);
"""
%
locals
()
def
set_view_base
(
self
,
x
,
fail
):
def
set_view_base
(
self
,
x
,
fail
):
return
"""
return
"""
//Set the base only now
//Set the base only now
...
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def
add_to_zview
(
self
,
x
,
fail
):
def
add_to_zview
(
self
,
x
,
fail
):
return
"""
return
"""
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_
%(x)
s);
(PyObject *) py_
%(x)
s);
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
if
parent_version
:
if
parent_version
:
return
parent_version
+
(
0
,)
return
parent_version
+
(
0
,)
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
9950ce08
...
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
...
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
"""
"""
import
sys
,
warnings
import
warnings
import
numpy
import
numpy
from
theano
import
Op
,
Apply
,
shared
,
config
,
Variable
from
theano
import
Op
,
Apply
,
shared
,
config
,
Variable
from
theano.tensor
import
(
raw_random
,
TensorType
,
as_tensor_variable
,
from
theano.tensor
import
(
raw_random
,
TensorType
,
as_tensor_variable
,
get_vector_length
,
cast
,
opt
,
scal
)
get_vector_length
,
cast
,
opt
,
scal
)
from
theano.tensor
import
zeros_like
,
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.tensor
import
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.gof.python25
import
all
,
any
from
theano.gof.python25
import
all
,
any
...
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
...
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
x
[
i
]
=
r
+
m
x
[
i
]
=
r
+
m
return
x
return
x
def
multMatVect
(
v
,
A
,
m1
,
B
,
m2
):
def
multMatVect
(
v
,
A
,
m1
,
B
,
m2
):
#multiply the first half of v by A with a modulo of m1
#multiply the first half of v by A with a modulo of m1
#and the second half by B with a modulo of m2
#and the second half by B with a modulo of m2
...
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
...
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
[
1401213391
,
1178684362
,
1431130166
]])
[
1401213391
,
1178684362
,
1431130166
]])
np_int32_vals
=
[
numpy
.
int32
(
i
)
for
i
in
(
0
,
7
,
9
,
15
,
16
,
22
,
24
)]
np_int32_vals
=
[
numpy
.
int32
(
i
)
for
i
in
(
0
,
7
,
9
,
15
,
16
,
22
,
24
)]
def
ff_2p134
(
rstate
):
def
ff_2p134
(
rstate
):
return
multMatVect
(
rstate
,
A1p134
,
M1
,
A2p134
,
M2
)
return
multMatVect
(
rstate
,
A1p134
,
M1
,
A2p134
,
M2
)
def
ff_2p72
(
rstate
):
def
ff_2p72
(
rstate
):
return
multMatVect
(
rstate
,
A1p72
,
M1
,
A2p72
,
M2
)
return
multMatVect
(
rstate
,
A1p72
,
M1
,
A2p72
,
M2
)
...
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
...
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
i0
,
i7
,
i9
,
i15
,
i16
,
i22
,
i24
=
np_int32_vals
i0
,
i7
,
i9
,
i15
,
i16
,
i22
,
i24
=
np_int32_vals
#first component
#first component
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
+
+
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
assert
type
(
y1
)
==
numpy
.
int32
assert
type
(
y1
)
==
numpy
.
int32
if
(
y1
<
0
or
y1
>=
M1
):
#must also check overflow
if
(
y1
<
0
or
y1
>=
M1
):
#must also check overflow
...
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
...
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
else
:
else
:
return
(
x11
-
x21
)
*
NORM
return
(
x11
-
x21
)
*
NORM
class
mrg_uniform_base
(
Op
):
class
mrg_uniform_base
(
Op
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
Op
.
__init__
(
self
)
Op
.
__init__
(
self
)
...
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
...
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
self
.
warned_numpy_version
=
False
self
.
warned_numpy_version
=
False
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
\
return
(
type
(
self
)
==
type
(
other
)
and
and
self
.
output_type
==
other
.
output_type
\
self
.
output_type
==
other
.
output_type
and
and
self
.
inplace
==
other
.
inplace
self
.
inplace
==
other
.
inplace
)
def
__hash__
(
self
):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
output_type
)
^
hash
(
self
.
inplace
)
return
hash
(
type
(
self
))
^
hash
(
self
.
output_type
)
^
hash
(
self
.
inplace
)
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
s
=
"inplace"
s
=
"inplace"
else
:
s
=
"no_inplace"
else
:
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
s
=
"no_inplace"
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
def
make_node
(
self
,
rstate
,
size
):
def
make_node
(
self
,
rstate
,
size
):
# error checking slightly redundant here, since
# error checking slightly redundant here, since
...
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
...
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
[
rstate
,
size
],
[
rstate
,
size
],
[
rstate
.
type
(),
self
.
output_type
()])
[
rstate
.
type
(),
self
.
output_type
()])
def
grad
(
self
,
inputs
,
ograd
):
def
grad
(
self
,
inputs
,
ograd
):
return
[
None
for
i
in
inputs
]
return
[
None
for
i
in
inputs
]
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
...
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
rstate
,
size
=
inp
rstate
,
size
=
inp
o_rstate
,
o_sample
=
out
o_rstate
,
o_sample
=
out
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
print
"Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
print
"Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
self
.
warned_numpy_version
=
True
self
.
warned_numpy_version
=
True
...
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
for
s
in
size
:
for
s
in
size
:
n_elements
*=
s
n_elements
*=
s
n_streams
,
_
=
rstate
.
shape
n_streams
,
_
=
rstate
.
shape
rval
=
numpy
.
zeros
(
n_elements
,
dtype
=
self
.
output_type
.
dtype
)
rval
=
numpy
.
zeros
(
n_elements
,
dtype
=
self
.
output_type
.
dtype
)
err_orig
=
numpy
.
seterr
(
over
=
'ignore'
)
err_orig
=
numpy
.
seterr
(
over
=
'ignore'
)
try
:
try
:
for
i
in
xrange
(
n_elements
):
for
i
in
xrange
(
n_elements
):
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
rval
[
i
]
=
sample
rval
[
i
]
=
sample
finally
:
finally
:
numpy
.
seterr
(
**
err_orig
)
numpy
.
seterr
(
**
err_orig
)
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
# send to GPU if necessary
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
rstate
,
size
=
inp
...
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
if
self
.
output_type
.
dtype
==
'float32'
:
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
# numpy.float32(number * M1) < 1.0
else
:
else
:
...
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
}
}
for (int i = 0; i <
%(ndim)
s; ++i)
for (int i = 0; i <
%(ndim)
s; ++i)
{
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(
%(o_sample)
s)[i] != odims[i]);
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(
%(o_sample)
s)[i] != odims[i]);
//fprintf(stderr, "size
%%
i
%%
i
\\
n", i, (int)odims[i]);
//fprintf(stderr, "size
%%
i
%%
i
\\
n", i, (int)odims[i]);
...
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
}
}
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
sample_data = (
%(otype)
s *)
%(o_sample)
s->data
;
sample_data = (
%(otype)
s *)
PyArray_DATA(
%(o_sample)
s)
;
state_data = (npy_int32 *)
%(o_rstate)
s->data
;
state_data = (npy_int32 *)
PyArray_DATA(
%(o_rstate)
s)
;
for (int i = 0; i < n_elements; ++i)
for (int i = 0; i < n_elements; ++i)
{
{
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
...
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
if
self
.
output_type
.
dtype
==
'float32'
:
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
# numpy.float32(number * M1) < 1.0
else
:
else
:
...
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
}
}
"""
%
locals
()
"""
%
locals
()
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
rstate
,
size
=
inp
...
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
else
:
else
:
otype
=
'double'
otype
=
'double'
SYNC
=
"CNDA_THREAD_SYNC"
;
SYNC
=
"CNDA_THREAD_SYNC"
return
"""
return
"""
//////// <code generated by mrg_uniform>
//////// <code generated by mrg_uniform>
...
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
for (int i = 0; i <
%(ndim)
s; ++i)
for (int i = 0; i <
%(ndim)
s; ++i)
{
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample
must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
...
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
//////// </ code generated by mrg_uniform>
//////// </ code generated by mrg_uniform>
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
7
,)
return
(
7
,)
...
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
...
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
elif
seed
>=
M2
:
elif
seed
>=
M2
:
raise
ValueError
(
'seed should be less than
%
i'
%
M2
,
seed
)
raise
ValueError
(
'seed should be less than
%
i'
%
M2
,
seed
)
self
.
rstate
=
numpy
.
asarray
([
seed
]
*
6
,
dtype
=
'int32'
)
self
.
rstate
=
numpy
.
asarray
([
seed
]
*
6
,
dtype
=
'int32'
)
elif
len
(
seed
)
==
6
:
elif
len
(
seed
)
==
6
:
if
seed
[
0
]
==
0
and
seed
[
1
]
==
0
and
seed
[
2
]
==
0
:
if
seed
[
0
]
==
0
and
seed
[
1
]
==
0
and
seed
[
2
]
==
0
:
raise
ValueError
(
'The first 3 values of seed should not be all 0'
,
seed
)
raise
ValueError
(
'The first 3 values of seed should not be all 0'
,
seed
)
if
seed
[
3
]
==
0
and
seed
[
4
]
==
0
and
seed
[
5
]
==
0
:
if
seed
[
3
]
==
0
and
seed
[
4
]
==
0
and
seed
[
5
]
==
0
:
...
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
...
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
"""
"""
assert
n_streams
<
2
**
72
assert
n_streams
<
2
**
72
assert
n_streams
>
0
assert
n_streams
>
0
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
[
0
]
=
self
.
rstate
rval
[
0
]
=
self
.
rstate
for
i
in
xrange
(
1
,
n_streams
):
for
i
in
xrange
(
1
,
n_streams
):
rval
[
i
]
=
ff_2p72
(
rval
[
i
-
1
])
rval
[
i
]
=
ff_2p72
(
rval
[
i
-
1
])
...
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
...
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
# currently no Theano node that will do a frombuffer
# currently no Theano node that will do a frombuffer
# reinterpretation.
# reinterpretation.
u
=
self
.
pretty_return
(
node_rstate
,
u
=
self
.
pretty_return
(
node_rstate
,
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
else
:
else
:
node_rstate
=
shared
(
self
.
get_substream_rstates
(
nstreams
))
node_rstate
=
shared
(
self
.
get_substream_rstates
(
nstreams
))
u
=
self
.
pretty_return
(
node_rstate
,
u
=
self
.
pretty_return
(
node_rstate
,
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
r
=
u
*
(
high
-
low
)
+
low
r
=
u
*
(
high
-
low
)
+
low
if
u
.
type
.
broadcastable
!=
r
.
type
.
broadcastable
:
if
u
.
type
.
broadcastable
!=
r
.
type
.
broadcastable
:
...
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
...
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
new_op
=
op
.
__class__
(
op
.
output_type
,
inplace
=
True
)
new_op
=
op
.
__class__
(
op
.
output_type
,
inplace
=
True
)
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
False
return
False
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
theano/scan_module/scan_perform.c
.txt
→
theano/scan_module/scan_perform.c
浏览文件 @
9950ce08
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/scan_module/scan_perform.pyx
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/scan_module/scan_perform_ext.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/sparse/basic.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/sparse/opt.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/basic.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/elemwise.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/nnet/nnet.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/subtensor.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/tests/test_blas.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/tests/test_inc_subtensor.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tensor/type.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
theano/tests/test_tutorial.py
浏览文件 @
9950ce08
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论