Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9950ce08
提交
9950ce08
authored
11月 06, 2013
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1580 from nouiz/deprecated
[MRG]Deprecated
上级
1d639d66
7c42bebe
显示空白字符变更
内嵌
并排
正在显示
23 个修改的文件
包含
457 行增加
和
517 行删除
+457
-517
MANIFEST.in
MANIFEST.in
+1
-0
fibby.txt
doc/extending/fibby.txt
+3
-3
basic.txt
doc/library/tensor/basic.txt
+32
-3
test_debugmode.py
theano/compile/tests/test_debugmode.py
+4
-4
cmodule.py
theano/gof/cmodule.py
+1
-1
lazylinker_c.c
theano/gof/lazylinker_c.c
+0
-0
lazylinker_c.py
theano/gof/lazylinker_c.py
+1
-4
check_blas.py
theano/misc/check_blas.py
+1
-0
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+53
-68
rng_mrg.py
theano/sandbox/rng_mrg.py
+42
-29
scan_perform.c
theano/scan_module/scan_perform.c
+0
-0
scan_perform.pyx
theano/scan_module/scan_perform.pyx
+1
-1
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+2
-5
basic.py
theano/sparse/basic.py
+27
-27
opt.py
theano/sparse/opt.py
+139
-139
basic.py
theano/tensor/basic.py
+1
-1
elemwise.py
theano/tensor/elemwise.py
+1
-1
nnet.py
theano/tensor/nnet/nnet.py
+22
-22
subtensor.py
theano/tensor/subtensor.py
+76
-146
test_blas.py
theano/tensor/tests/test_blas.py
+14
-28
test_inc_subtensor.py
theano/tensor/tests/test_inc_subtensor.py
+2
-2
type.py
theano/tensor/type.py
+31
-30
test_tutorial.py
theano/tests/test_tutorial.py
+3
-3
没有找到文件。
MANIFEST.in
浏览文件 @
9950ce08
global-include *.txt
global-include *.txt
global-include *.c
global-include *.cu
global-include *.cu
global-include *.cuh
global-include *.cuh
global-include *.sh
global-include *.sh
...
...
doc/extending/fibby.txt
浏览文件 @
9950ce08
...
@@ -67,9 +67,9 @@ you should check the strides and alignment.
...
@@ -67,9 +67,9 @@ you should check the strides and alignment.
if (!%(y)s)
if (!%(y)s)
%(fail)s;
%(fail)s;
{//New scope needed to make compilation work
{//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)
%(y)s->data
;
dtype_%(y)s * y = (dtype_%(y)s*)
PyArray_DATA(%(y)s)
;
dtype_%(x)s * x = (dtype_%(x)s*)
%(x)s->data
;
dtype_%(x)s * x = (dtype_%(x)s*)
PyArray_DATA(%(x)s)
;
for (int i = 2; i <
%(x)s->dimensions
[0]; ++i)
for (int i = 2; i <
PyArray_DIMS(%(x)s)
[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
y[i] = y[i-1]*y[i-2] + x[i];
}
}
""" % locals()
""" % locals()
...
...
doc/library/tensor/basic.txt
浏览文件 @
9950ce08
...
@@ -420,7 +420,9 @@ TensorVariable
...
@@ -420,7 +420,9 @@ TensorVariable
.. class:: _tensor_py_operators(object)
.. class:: _tensor_py_operators(object)
This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`).
This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type
.. attribute:: type
...
@@ -472,6 +474,10 @@ TensorVariable
...
@@ -472,6 +474,10 @@ TensorVariable
See :func:`flatten`.
See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
.. attribute:: T
.. attribute:: T
Transpose of this tensor.
Transpose of this tensor.
...
@@ -485,8 +491,31 @@ TensorVariable
...
@@ -485,8 +491,31 @@ TensorVariable
same vector! Use `reshape` or `dimshuffle` to turn your vector
same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix.
into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy()
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
.. method:: nonzero_values(self)
.. method:: sort(self, axis=-1, kind='quicksort', order=None)
.. method:: argsort(self, axis=-1, kind='quicksort', order=None)
.. method:: clip(self, a_min, a_max)
.. method:: conf()
.. method:: repeat(repeats, axis=None)
.. method:: round(mode="half_away_from_zero")
.. method:: trace()
.. method:: get_scalar_constant_value()
.. method:: zeros_like(model, dtype=None)
All the above methods are equivalent to NumPy for Theano on the current tensor.
.. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
Those elemwise operation are supported via Python syntax.
Shaping and Shuffling
Shaping and Shuffling
=====================
=====================
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
9950ce08
...
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
...
@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
prep_vars
=
"""
prep_vars
=
"""
//the output array has size M x N
//the output array has size M x N
npy_intp M = PyArray_DIMS(
%(a)
s)[0];
npy_intp M = PyArray_DIMS(
%(a)
s)[0];
npy_intp Sa =
%(a)
s->strides
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sa =
PyArray_STRIDES(
%(a)
s)
[0] / PyArray_DESCR(
%(a)
s)->elsize;
npy_intp Sz =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_double * Da = (npy_double*)
%(a)
s->data
;
npy_double * Da = (npy_double*)
PyArray_BYTES(
%(a)
s)
;
npy_double * Dz = (npy_double*)
%(z)
s->data
;
npy_double * Dz = (npy_double*)
PyArray_BYTES(
%(z)
s)
;
//clear the output array
//clear the output array
for (npy_intp m = 0; m < M; ++m)
for (npy_intp m = 0; m < M; ++m)
...
...
theano/gof/cmodule.py
浏览文件 @
9950ce08
...
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
...
@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
#to use the new API, but not everywhere. When finished, enable
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#the following macro to assert that we don't bring new code
#that use the old API.
#that use the old API.
#
cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
cxxflags
.
append
(
"-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"
)
numpy_ver
=
[
int
(
n
)
for
n
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
numpy_ver
=
[
int
(
n
)
for
n
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
# numpy 1.7 deprecated the following macro but the new one didn't
# numpy 1.7 deprecated the following macro but the new one didn't
...
...
theano/gof/lazylinker_c.c
.txt
→
theano/gof/lazylinker_c.c
浏览文件 @
9950ce08
File moved
theano/gof/lazylinker_c.py
浏览文件 @
9950ce08
...
@@ -76,10 +76,7 @@ except ImportError:
...
@@ -76,10 +76,7 @@ except ImportError:
except
ImportError
:
except
ImportError
:
_logger
.
info
(
"Compiling new CVM"
)
_logger
.
info
(
"Compiling new CVM"
)
dirname
=
'lazylinker_ext'
dirname
=
'lazylinker_ext'
# We use a .txt extensions as otherwise it don't get
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c'
)
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'gof'
,
'lazylinker_c.c.txt'
)
code
=
open
(
cfile
)
.
read
()
code
=
open
(
cfile
)
.
read
()
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
...
...
theano/misc/check_blas.py
浏览文件 @
9950ce08
...
@@ -220,6 +220,7 @@ if __name__ == "__main__":
...
@@ -220,6 +220,7 @@ if __name__ == "__main__":
GTX 650 Ti 0.27s
GTX 650 Ti 0.27s
GTX 460 0.37s 0.45s
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GTX 550 Ti 0.57s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
9950ce08
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data
=
"""
#define CudaNdarray_set_device_data2(obj, ptr, base)
\
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
"""
"""
Implement subtensor on the gpu.
Implement subtensor on the gpu.
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim
=
node
.
outputs
[
0
]
.
ndim
view_ndim
=
node
.
outputs
[
0
]
.
ndim
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
decl
=
"CudaNdarray* xview = NULL;"
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
view_ndim
=
view_ndim
,
c_prefix
=
'CudaNdarray'
,
strides_mul
=
4
,
)
build_view
=
"""
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
//TODO: alternatively, fix the memory leak on failure
CudaNdarray*
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (!xview)
if (!xview)
{
{
%(fail)
s;
%(fail)
s;
}
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
{
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
"GpuSubtensor is not able to set the"
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)
s;
%(fail)
s;
}
}
cnda_mark_dev_structure_dirty(xview);
cnda_mark_dev_structure_dirty(xview);
"""
%
locals
()
for(int idx=0;idx <
%(view_ndim)
s; idx++){
//For broadcasted dimensions, set the strides to 0
get_xview
=
_define_set_data
+
\
//We can't do that only for broadcasted dimensions as this can happen
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
//for dimensions of size 0. That are rebroadcated later.
self
.
idx_list
,
if(xview_dims[idx]==1)
c_prefix
=
'CudaNdarray'
,
CudaNdarray_set_stride(xview, idx, 0);
set_data
=
'CudaNdarray_set_device_data2'
,
else
set_dim
=
'CudaNdarray_set_dim'
,
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
set_stride
=
'CudaNdarray_set_stride'
,
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
update_flags
=
""
,
strides_mul
=
4
)
finish_view
=
""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for
idx
in
range
(
node
.
outputs
[
0
]
.
ndim
):
finish_view
+=
"""
if(CudaNdarray_HOST_DIMS(xview)[
%(idx)
s]==1)
CudaNdarray_set_stride(xview,
%(idx)
s, 0);
"""
%
locals
()
finish_view
+=
"""
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)
s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)
s;
}
}
"""
%
locals
()
finish_view
=
"""
Py_XDECREF(
%(z)
s);
Py_XDECREF(
%(z)
s);
%(z)
s = xview;
%(z)
s = xview;
"""
%
locals
()
"""
%
locals
()
return
build_view
+
"{"
+
get_xview
+
"}"
+
finish_view
return
decl
+
get_xview
+
build_view
+
finish_view
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
hv
=
self
.
helper_c_code_cache_version
()
hv
=
self
.
helper_c_code_cache_version
()
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""
%
locals
()
"""
%
locals
()
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
"""
"""
Implement IncSubtensor on the gpu.
Implement IncSubtensor on the gpu.
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
"""
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
def
decl_view
(
self
):
return
"CudaNdarray* zview = NULL;"
def
make_view_array
(
self
,
x
,
view_ndim
):
def
make_view_array
(
self
,
x
,
view_ndim
):
"""
"""
:param x: a string identifying an array to be viewed
:param x: a string identifying an array to be viewed
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
right indexing; we'll do that manually later.
"""
"""
return
"""CudaNdarray* zview = (CudaNdarray*)
ret
=
"""zview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
CudaNdarray_New(
%(view_ndim)
s)"""
%
locals
()
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <
%(view_ndim)
s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
"""
%
locals
()
return
ret
def
get_helper_c_code_args
(
self
):
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to use with helper_c_code"""
""" Return a dictionary of arguments to use with helper_c_code"""
return
{
'update_flags'
:
""
,
return
{
'c_prefix'
:
'CudaNdarray'
,
'c_prefix'
:
'CudaNdarray'
,
'set_data'
:
'CudaNdarray_set_device_data2'
,
'set_dim'
:
'CudaNdarray_set_dim'
,
'set_stride'
:
'CudaNdarray_set_stride'
,
'update_flags'
:
""
,
'strides_mul'
:
4
'strides_mul'
:
4
}
}
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
"""
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
def
define_set_data
(
self
):
return
_define_set_data
def
link_view_array
(
self
,
x
,
fail
):
return
"""
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)
s;
}
cnda_mark_dev_structure_dirty(zview);
"""
%
locals
()
def
set_view_base
(
self
,
x
,
fail
):
def
set_view_base
(
self
,
x
,
fail
):
return
"""
return
"""
//Set the base only now
//Set the base only now
...
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2823,7 +2810,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def
add_to_zview
(
self
,
x
,
fail
):
def
add_to_zview
(
self
,
x
,
fail
):
return
"""
return
"""
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_
%(x)
s);
(PyObject *) py_
%(x)
s);
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
if
parent_version
:
if
parent_version
:
return
parent_version
+
(
0
,)
return
parent_version
+
(
0
,)
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
9950ce08
...
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
...
@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
"""
"""
import
sys
,
warnings
import
warnings
import
numpy
import
numpy
from
theano
import
Op
,
Apply
,
shared
,
config
,
Variable
from
theano
import
Op
,
Apply
,
shared
,
config
,
Variable
from
theano.tensor
import
(
raw_random
,
TensorType
,
as_tensor_variable
,
from
theano.tensor
import
(
raw_random
,
TensorType
,
as_tensor_variable
,
get_vector_length
,
cast
,
opt
,
scal
)
get_vector_length
,
cast
,
opt
,
scal
)
from
theano.tensor
import
zeros_like
,
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.tensor
import
sqrt
,
log
,
sin
,
cos
,
join
,
prod
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.gof.python25
import
all
,
any
from
theano.gof.python25
import
all
,
any
...
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
...
@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
x
[
i
]
=
r
+
m
x
[
i
]
=
r
+
m
return
x
return
x
def
multMatVect
(
v
,
A
,
m1
,
B
,
m2
):
def
multMatVect
(
v
,
A
,
m1
,
B
,
m2
):
#multiply the first half of v by A with a modulo of m1
#multiply the first half of v by A with a modulo of m1
#and the second half by B with a modulo of m2
#and the second half by B with a modulo of m2
...
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
...
@@ -79,9 +81,11 @@ A2p134 = numpy.asarray(
[
1401213391
,
1178684362
,
1431130166
]])
[
1401213391
,
1178684362
,
1431130166
]])
np_int32_vals
=
[
numpy
.
int32
(
i
)
for
i
in
(
0
,
7
,
9
,
15
,
16
,
22
,
24
)]
np_int32_vals
=
[
numpy
.
int32
(
i
)
for
i
in
(
0
,
7
,
9
,
15
,
16
,
22
,
24
)]
def
ff_2p134
(
rstate
):
def
ff_2p134
(
rstate
):
return
multMatVect
(
rstate
,
A1p134
,
M1
,
A2p134
,
M2
)
return
multMatVect
(
rstate
,
A1p134
,
M1
,
A2p134
,
M2
)
def
ff_2p72
(
rstate
):
def
ff_2p72
(
rstate
):
return
multMatVect
(
rstate
,
A1p72
,
M1
,
A2p72
,
M2
)
return
multMatVect
(
rstate
,
A1p72
,
M1
,
A2p72
,
M2
)
...
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
...
@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
i0
,
i7
,
i9
,
i15
,
i16
,
i22
,
i24
=
np_int32_vals
i0
,
i7
,
i9
,
i15
,
i16
,
i22
,
i24
=
np_int32_vals
#first component
#first component
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
y1
=
(((
x12
&
MASK12
)
<<
i22
)
+
(
x12
>>
i9
)
+
+
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
((
x13
&
MASK13
)
<<
i7
)
+
(
x13
>>
i24
))
assert
type
(
y1
)
==
numpy
.
int32
assert
type
(
y1
)
==
numpy
.
int32
if
(
y1
<
0
or
y1
>=
M1
):
#must also check overflow
if
(
y1
<
0
or
y1
>=
M1
):
#must also check overflow
...
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
...
@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
else
:
else
:
return
(
x11
-
x21
)
*
NORM
return
(
x11
-
x21
)
*
NORM
class
mrg_uniform_base
(
Op
):
class
mrg_uniform_base
(
Op
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
def
__init__
(
self
,
output_type
,
inplace
=
False
):
Op
.
__init__
(
self
)
Op
.
__init__
(
self
)
...
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
...
@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
self
.
warned_numpy_version
=
False
self
.
warned_numpy_version
=
False
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
\
return
(
type
(
self
)
==
type
(
other
)
and
and
self
.
output_type
==
other
.
output_type
\
self
.
output_type
==
other
.
output_type
and
and
self
.
inplace
==
other
.
inplace
self
.
inplace
==
other
.
inplace
)
def
__hash__
(
self
):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
output_type
)
^
hash
(
self
.
inplace
)
return
hash
(
type
(
self
))
^
hash
(
self
.
output_type
)
^
hash
(
self
.
inplace
)
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
s
=
"inplace"
s
=
"inplace"
else
:
s
=
"no_inplace"
else
:
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
s
=
"no_inplace"
return
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
output_type
,
s
)
def
make_node
(
self
,
rstate
,
size
):
def
make_node
(
self
,
rstate
,
size
):
# error checking slightly redundant here, since
# error checking slightly redundant here, since
...
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
...
@@ -166,7 +173,7 @@ class mrg_uniform_base(Op):
[
rstate
,
size
],
[
rstate
,
size
],
[
rstate
.
type
(),
self
.
output_type
()])
[
rstate
.
type
(),
self
.
output_type
()])
def
grad
(
self
,
inputs
,
ograd
):
def
grad
(
self
,
inputs
,
ograd
):
return
[
None
for
i
in
inputs
]
return
[
None
for
i
in
inputs
]
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
...
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -187,8 +194,8 @@ class mrg_uniform(mrg_uniform_base):
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
rstate
,
size
=
inp
rstate
,
size
=
inp
o_rstate
,
o_sample
=
out
o_rstate
,
o_sample
=
out
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
numpy_version
=
numpy
.
__version__
.
split
(
'.'
)
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
if
not
self
.
warned_numpy_version
and
int
(
numpy_version
[
0
])
<=
1
and
int
(
numpy_version
[
1
])
<
3
:
print
"Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
print
"Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
self
.
warned_numpy_version
=
True
self
.
warned_numpy_version
=
True
...
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -201,20 +208,21 @@ class mrg_uniform(mrg_uniform_base):
for
s
in
size
:
for
s
in
size
:
n_elements
*=
s
n_elements
*=
s
n_streams
,
_
=
rstate
.
shape
n_streams
,
_
=
rstate
.
shape
rval
=
numpy
.
zeros
(
n_elements
,
dtype
=
self
.
output_type
.
dtype
)
rval
=
numpy
.
zeros
(
n_elements
,
dtype
=
self
.
output_type
.
dtype
)
err_orig
=
numpy
.
seterr
(
over
=
'ignore'
)
err_orig
=
numpy
.
seterr
(
over
=
'ignore'
)
try
:
try
:
for
i
in
xrange
(
n_elements
):
for
i
in
xrange
(
n_elements
):
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
sample
=
mrg_next_value
(
rstate
[
i
%
n_streams
],
rstate
[
i
%
n_streams
])
rval
[
i
]
=
sample
rval
[
i
]
=
sample
finally
:
finally
:
numpy
.
seterr
(
**
err_orig
)
numpy
.
seterr
(
**
err_orig
)
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
# send to GPU if necessary
o_rstate
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
rstate
)
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
o_sample
[
0
]
=
node
.
outputs
[
1
]
.
type
.
filter
(
rval
.
reshape
(
size
))
# send to GPU if necessary
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
rstate
,
size
=
inp
...
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
if
self
.
output_type
.
dtype
==
'float32'
:
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
# numpy.float32(number * M1) < 1.0
else
:
else
:
...
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
}
}
for (int i = 0; i <
%(ndim)
s; ++i)
for (int i = 0; i <
%(ndim)
s; ++i)
{
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(
%(o_sample)
s)[i] != odims[i]);
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(
%(o_sample)
s)[i] != odims[i]);
//fprintf(stderr, "size
%%
i
%%
i
\\
n", i, (int)odims[i]);
//fprintf(stderr, "size
%%
i
%%
i
\\
n", i, (int)odims[i]);
...
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
}
}
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
n_streams = PyArray_DIMS(
%(o_rstate)
s)[0];
sample_data = (
%(otype)
s *)
%(o_sample)
s->data
;
sample_data = (
%(otype)
s *)
PyArray_DATA(
%(o_sample)
s)
;
state_data = (npy_int32 *)
%(o_rstate)
s->data
;
state_data = (npy_int32 *)
PyArray_DATA(
%(o_rstate)
s)
;
for (int i = 0; i < n_elements; ++i)
for (int i = 0; i < n_elements; ++i)
{
{
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
npy_int32 * state_data_i = state_data + (i
%%
n_streams)*6;
...
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
if
self
.
output_type
.
dtype
==
'float32'
:
if
self
.
output_type
.
dtype
==
'float32'
:
otype
=
'float'
otype
=
'float'
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
NORM
=
'4.6566126e-10f'
#
numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that
# this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0
# numpy.float32(number * M1) < 1.0
else
:
else
:
...
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
}
}
"""
%
locals
()
"""
%
locals
()
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
rstate
,
size
=
inp
...
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
else
:
else
:
otype
=
'double'
otype
=
'double'
SYNC
=
"CNDA_THREAD_SYNC"
;
SYNC
=
"CNDA_THREAD_SYNC"
return
"""
return
"""
//////// <code generated by mrg_uniform>
//////// <code generated by mrg_uniform>
...
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
for (int i = 0; i <
%(ndim)
s; ++i)
for (int i = 0; i <
%(ndim)
s; ++i)
{
{
odims[i] = ((npy_int32*)(
%(size)
s->data +
%(size)
s->strides
[0] * i))[0];
odims[i] = ((npy_int32*)(
PyArray_BYTES(
%(size)
s) + PyArray_STRIDES(
%(size)
s)
[0] * i))[0];
n_elements *= odims[i];
n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample
must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
|| CudaNdarray_HOST_DIMS(
%(o_sample)
s)[i] != odims[i]);
...
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
...
@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
}
}
//////// </ code generated by mrg_uniform>
//////// </ code generated by mrg_uniform>
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
7
,)
return
(
7
,)
...
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
...
@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
elif
seed
>=
M2
:
elif
seed
>=
M2
:
raise
ValueError
(
'seed should be less than
%
i'
%
M2
,
seed
)
raise
ValueError
(
'seed should be less than
%
i'
%
M2
,
seed
)
self
.
rstate
=
numpy
.
asarray
([
seed
]
*
6
,
dtype
=
'int32'
)
self
.
rstate
=
numpy
.
asarray
([
seed
]
*
6
,
dtype
=
'int32'
)
elif
len
(
seed
)
==
6
:
elif
len
(
seed
)
==
6
:
if
seed
[
0
]
==
0
and
seed
[
1
]
==
0
and
seed
[
2
]
==
0
:
if
seed
[
0
]
==
0
and
seed
[
1
]
==
0
and
seed
[
2
]
==
0
:
raise
ValueError
(
'The first 3 values of seed should not be all 0'
,
seed
)
raise
ValueError
(
'The first 3 values of seed should not be all 0'
,
seed
)
if
seed
[
3
]
==
0
and
seed
[
4
]
==
0
and
seed
[
5
]
==
0
:
if
seed
[
3
]
==
0
and
seed
[
4
]
==
0
and
seed
[
5
]
==
0
:
...
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
...
@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
"""
"""
assert
n_streams
<
2
**
72
assert
n_streams
<
2
**
72
assert
n_streams
>
0
assert
n_streams
>
0
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
=
numpy
.
zeros
((
n_streams
,
6
),
dtype
=
'int32'
)
rval
[
0
]
=
self
.
rstate
rval
[
0
]
=
self
.
rstate
for
i
in
xrange
(
1
,
n_streams
):
for
i
in
xrange
(
1
,
n_streams
):
rval
[
i
]
=
ff_2p72
(
rval
[
i
-
1
])
rval
[
i
]
=
ff_2p72
(
rval
[
i
-
1
])
...
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
...
@@ -776,11 +785,13 @@ class MRG_RandomStreams(object):
# currently no Theano node that will do a frombuffer
# currently no Theano node that will do a frombuffer
# reinterpretation.
# reinterpretation.
u
=
self
.
pretty_return
(
node_rstate
,
u
=
self
.
pretty_return
(
node_rstate
,
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
GPU_mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
else
:
else
:
node_rstate
=
shared
(
self
.
get_substream_rstates
(
nstreams
))
node_rstate
=
shared
(
self
.
get_substream_rstates
(
nstreams
))
u
=
self
.
pretty_return
(
node_rstate
,
u
=
self
.
pretty_return
(
node_rstate
,
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
*
mrg_uniform
.
new
(
node_rstate
,
ndim
,
dtype
,
size
))
r
=
u
*
(
high
-
low
)
+
low
r
=
u
*
(
high
-
low
)
+
low
if
u
.
type
.
broadcastable
!=
r
.
type
.
broadcastable
:
if
u
.
type
.
broadcastable
!=
r
.
type
.
broadcastable
:
...
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
...
@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
new_op
=
op
.
__class__
(
op
.
output_type
,
inplace
=
True
)
new_op
=
op
.
__class__
(
op
.
output_type
,
inplace
=
True
)
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
new_op
.
make_node
(
*
node
.
inputs
)
.
outputs
return
False
return
False
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
optdb
.
register
(
'random_make_inplace_mrg'
,
opt
.
in2out
(
mrg_random_make_inplace
,
ignore_newtrees
=
True
),
99
,
'fast_run'
,
'inplace'
)
theano/scan_module/scan_perform.c
.txt
→
theano/scan_module/scan_perform.c
浏览文件 @
9950ce08
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/scan_module/scan_perform.pyx
浏览文件 @
9950ce08
...
@@ -62,7 +62,7 @@ import copy
...
@@ -62,7 +62,7 @@ import copy
def get_version():
def get_version():
return 0.27
8
return 0.27
9
@cython.boundscheck(False)
@cython.boundscheck(False)
def perform(
def perform(
...
...
theano/scan_module/scan_perform_ext.py
浏览文件 @
9950ce08
...
@@ -11,7 +11,7 @@ _logger = logging.getLogger('theano.scan_module.scan_perform')
...
@@ -11,7 +11,7 @@ _logger = logging.getLogger('theano.scan_module.scan_perform')
_logger
.
setLevel
(
logging
.
WARN
)
_logger
.
setLevel
(
logging
.
WARN
)
version
=
0.2
78
# must match constant returned in function get_version()
version
=
0.2
80
# must match constant returned in function get_version()
need_reload
=
False
need_reload
=
False
...
@@ -52,11 +52,8 @@ except ImportError:
...
@@ -52,11 +52,8 @@ except ImportError:
_logger
.
info
(
"Compiling C code for scan"
)
_logger
.
info
(
"Compiling C code for scan"
)
dirname
=
'scan_perform'
dirname
=
'scan_perform'
# We use a .txt extensions as otherwise it don't get
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'scan_module'
,
cfile
=
os
.
path
.
join
(
theano
.
__path__
[
0
],
'scan_module'
,
'scan_perform.c
.txt
'
)
'scan_perform.c'
)
code
=
open
(
cfile
)
.
read
()
code
=
open
(
cfile
)
.
read
()
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
...
...
theano/sparse/basic.py
浏览文件 @
9950ce08
...
@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
...
@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
}
}
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32*)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32*)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_data)
s* __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s* __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
dtype_
%(y)
s* ydata = (dtype_
%(y)
s*)PyArray_DATA(
%(y)
s);
dtype_
%(y)
s* ydata = (dtype_
%(y)
s*)PyArray_DATA(
%(y)
s);
dtype_
%(z)
s* zdata = (dtype_
%(z)
s*)PyArray_DATA(
%(z)
s);
dtype_
%(z)
s* zdata = (dtype_
%(z)
s*)PyArray_DATA(
%(z)
s);
...
@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
...
@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_g)
s)->type_num
);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_g)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = PyArray_DIMS(
%(_indices)
s)[0];
npy_intp nnz = PyArray_DIMS(
%(_indices)
s)[0];
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp Sindices =
%(_indices)
s->strides
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindices =
PyArray_STRIDES(
%(_indices)
s)
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindptr =
%(_indptr)
s->strides
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
npy_intp Sindptr =
PyArray_STRIDES(
%(_indptr)
s)
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
const npy_intp Sd1 =
%(_d)
s->strides
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sd1 =
PyArray_STRIDES(
%(_d)
s)
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sg1 =
%(_g)
s->strides
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp Sg1 =
PyArray_STRIDES(
%(_g)
s)
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
{
{
// extract j-th row of dense matrix
// extract j-th row of dense matrix
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
%(_d)
s->data +
%(_d)
s->strides
[0] * j);
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
PyArray_BYTES(
%(_d)
s) + PyArray_STRIDES(
%(_d)
s)
[0] * j);
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
// for each non-null value in the sparse column
// for each non-null value in the sparse column
...
@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
npy_int32 i = indices[i_idx * Sindices];
npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
// extract corresponding row in gradient
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
%(_g)
s->data +
%(_g)
s->strides
[0] * i);
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
PyArray_BYTES(
%(_g)
s) + PyArray_STRIDES(
%(_g)
s)
[0] * i);
double ip = 0.0;
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// make sure that row index is not bigger than actual number of rows
...
@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
}
}
// write resulting gradient to sparse output
// write resulting gradient to sparse output
((dtype_
%(_zout)
s* __restrict__)(
%(_zout)
s->data + i_idx *
%(_zout)
s->strides
[0]))[0] = ip;
((dtype_
%(_zout)
s* __restrict__)(
PyArray_BYTES(
%(_zout)
s) + i_idx * PyArray_STRIDES(
%(_zout)
s)
[0]))[0] = ip;
}
}
}
}
}
}
...
@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
...
@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_g)
s)->type_num
);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_g)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
...
@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
// extract number of rows
// extract number of rows
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp Sindices =
%(_indices)
s->strides
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindices =
PyArray_STRIDES(
%(_indices)
s)
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindptr =
%(_indptr)
s->strides
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
npy_intp Sindptr =
PyArray_STRIDES(
%(_indptr)
s)
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
const npy_intp Sd1 =
%(_d)
s->strides
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sd1 =
PyArray_STRIDES(
%(_d)
s)
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sg1 =
%(_g)
s->strides
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp Sg1 =
PyArray_STRIDES(
%(_g)
s)
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
// loop over columns of sparse matrix
// loop over columns of sparse matrix
for (npy_int32 i = 0; i < N; ++i)
for (npy_int32 i = 0; i < N; ++i)
...
@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
npy_int32 j = indices[j_idx * Sindices];
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
// extract j-th row of dense matrix
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
%(_d)
s->data +
%(_d)
s->strides
[0] * j);
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
PyArray_BYTES(
%(_d)
s) + PyArray_STRIDES(
%(_d)
s)
[0] * j);
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
// extract corresponding row in gradient
// extract corresponding row in gradient
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
%(_g)
s->data +
%(_g)
s->strides
[0] * i);
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
PyArray_BYTES(
%(_g)
s) + PyArray_STRIDES(
%(_g)
s)
[0] * i);
double ip = 0.0;
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// make sure that row index is not bigger than actual number of rows
...
@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
}
}
// write resulting gradient to sparse output
// write resulting gradient to sparse output
((dtype_
%(_zout)
s* __restrict__)(
%(_zout)
s->data + j_idx *
%(_zout)
s->strides
[0]))[0] = ip;
((dtype_
%(_zout)
s* __restrict__)(
PyArray_BYTES(
%(_zout)
s) + j_idx * PyArray_STRIDES(
%(_zout)
s)
[0]))[0] = ip;
}
}
}
}
}
}
...
...
theano/sparse/opt.py
浏览文件 @
9950ce08
...
@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
...
@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
if (PyArray_NDIM(
%(a_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(a_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_val)
s)->type_num
!=
%(typenum_a_val)
s) {
if (PyArray_
TYPE(
%(a_val)
s)
!=
%(typenum_a_val)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val");
%(fail)
s;}
if (PyArray_
DESCR(
%(b)
s)->type_num
!=
%(typenum_b)
s) {
if (PyArray_
TYPE(
%(b)
s)
!=
%(typenum_b)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_nrows)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_nrows)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
...
@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows");
%(fail)
s;}
if ((!
%(z)
s)
if ((!
%(z)
s)
|| (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
%(a_nrows)
s->data
)[0])
|| (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(a_nrows)
s)
)[0])
|| (PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(b)
s)[1])
|| (PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(b)
s)[1])
)
)
{
{
{Py_XDECREF(
%(z)
s);}
{Py_XDECREF(
%(z)
s);}
npy_intp dims[] = {0, 0};
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)
%(a_nrows)
s->data
)[0];
dims[0] = ((npy_int32 *)
PyArray_DATA(
%(a_nrows)
s)
)[0];
dims[1] = PyArray_DIMS(
%(b)
s)[1];
dims[1] = PyArray_DIMS(
%(b)
s)[1];
%(z)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_z)
s);
%(z)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_z)
s);
}
}
...
@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
...
@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szm =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
//npy_intp Sbm =
%(b)
s->strides
[0] / PyArray_DESCR(
%(b)
s)->elsize;
//npy_intp Sbm =
PyArray_STRIDES(
%(b)
s)
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
%(b)
s->strides
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
PyArray_STRIDES(
%(b)
s)
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sval =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s
;
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
...
@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
for (npy_int32 k = 0; k < K; ++k)
for (npy_int32 k = 0; k < K; ++k)
{
{
// get pointer to k-th row of dense matrix
// get pointer to k-th row of dense matrix
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
%(b)
s->data +
%(b)
s->strides
[0] * k);
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
PyArray_BYTES(
%(b)
s) + PyArray_STRIDES(
%(b)
s)
[0] * k);
// loop over sparse column indices through index pointer array
// loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix)
// (amounts to looping over rows M of sparse matrix)
...
@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
...
@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
const dtype_
%(a_val)
s Amk = Dval[m_idx * Sval]; // actual value at that location
const dtype_
%(a_val)
s Amk = Dval[m_idx * Sval]; // actual value at that location
// pointer to m-th row of the output matrix Z
// pointer to m-th row of the output matrix Z
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
%(z)
s->data +
%(z)
s->strides
[0] * m);
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
PyArray_BYTES(
%(z)
s) + PyArray_STRIDES(
%(z)
s)
[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= PyArray_DIMS(
%(z)
s)[0])
if (m >= PyArray_DIMS(
%(z)
s)[0])
...
@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
...
@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
if (PyArray_NDIM(
%(a_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(a_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
...
@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szm =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sbm =
%(b)
s->strides
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbm =
PyArray_STRIDES(
%(b)
s)
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
%(b)
s->strides
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
PyArray_STRIDES(
%(b)
s)
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sval =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s)
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s)
;
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
...
@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
for (npy_int64 m = 0; m < M; ++m)
for (npy_int64 m = 0; m < M; ++m)
{
{
// pointer to m-th row of the output matrix Z
// pointer to m-th row of the output matrix Z
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
%(z)
s->data +
%(z)
s->strides
[0] * m);
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
PyArray_BYTES(
%(z)
s) + PyArray_STRIDES(
%(z)
s)
[0] * m);
// loop over sparse rows indices through index pointer array
// loop over sparse rows indices through index pointer array
// (amounts to looping over cols k of sparse matrix)
// (amounts to looping over cols k of sparse matrix)
...
@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
...
@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
const dtype_
%(a_val)
s Amk = Dval[k_idx * Sval]; // actual value at that location
const dtype_
%(a_val)
s Amk = Dval[k_idx * Sval]; // actual value at that location
// get pointer to k-th row of dense matrix
// get pointer to k-th row of dense matrix
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
%(b)
s->data +
%(b)
s->strides
[0] * k);
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
PyArray_BYTES(
%(b)
s) + PyArray_STRIDES(
%(b)
s)
[0] * k);
// loop over final dimension (cols of dense matrix) and perform dot product
// loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n)
for(npy_int32 n = 0; n < N; ++n)
...
@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
...
@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
if (PyArray_NDIM(
%(x_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(x_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(y)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(y)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_val)
s)->type_num
!=
%(typenum_x_val)
s) {
if (PyArray_
TYPE(
%(x_val)
s)
!=
%(typenum_x_val)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val");
%(fail)
s;}
if (PyArray_
DESCR(
%(y)
s)->type_num
!=
%(typenum_y)
s) {
if (PyArray_
TYPE(
%(y)
s)
!=
%(typenum_y)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y");
%(fail)
s;}
if (PyArray_
DESCR(
%(z)
s)->type_num
!=
%(typenum_z)
s) {
if (PyArray_
TYPE(
%(z)
s)
!=
%(typenum_z)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z");
%(fail)
s;}
if (PyArray_
DESCR(
%(alpha)
s)->type_num
!=
%(typenum_alpha)
s) {
if (PyArray_
TYPE(
%(alpha)
s)
!=
%(typenum_alpha)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(x_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(x_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_nrows)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(x_nrows)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(x_val)
s)[0] != PyArray_DIMS(
%(x_ind)
s)[0])
if (PyArray_DIMS(
%(x_val)
s)[0] != PyArray_DIMS(
%(x_ind)
s)[0])
...
@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
...
@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
if (PyArray_DIMS(
%(x_ptr)
s)[0] != PyArray_DIMS(
%(y)
s)[0]+1)
if (PyArray_DIMS(
%(x_ptr)
s)[0] != PyArray_DIMS(
%(y)
s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows");
%(fail)
s;}
if (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
%(x_nrows)
s->data
)[0] || PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
if (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0] || PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size.");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size.");
%(fail)
s;}
if (PyArray_SIZE(
%(alpha)
s) != 1)
if (PyArray_SIZE(
%(alpha)
s) != 1)
...
@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
...
@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
Py_INCREF(
%(zn)
s);
Py_INCREF(
%(zn)
s);
}
}
else if (!
%(zn)
s
else if (!
%(zn)
s
|| (PyArray_DIMS(
%(zn)
s)[0] != ((npy_int32 *)
%(x_nrows)
s->data
)[0])
|| (PyArray_DIMS(
%(zn)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0])
|| (PyArray_DIMS(
%(zn)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
|| (PyArray_DIMS(
%(zn)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
)
)
{
{
{Py_XDECREF(
%(zn)
s);}
{Py_XDECREF(
%(zn)
s);}
npy_intp dims[] = {0, 0};
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)
%(x_nrows)
s->data
)[0];
dims[0] = ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0];
dims[1] = PyArray_DIMS(
%(y)
s)[1];
dims[1] = PyArray_DIMS(
%(y)
s)[1];
%(zn)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_zn)
s);
%(zn)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_zn)
s);
}
}
...
@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
...
@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
npy_intp K = PyArray_DIMS(
%(y)
s)[0];
npy_intp K = PyArray_DIMS(
%(y)
s)[0];
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
const dtype_
%(x_val)
s* __restrict__ Dval = (dtype_
%(x_val)
s*)
%(x_val)
s->data
;
const dtype_
%(x_val)
s* __restrict__ Dval = (dtype_
%(x_val)
s*)
PyArray_DATA(
%(x_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(x_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(x_ind)
s)
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(x_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(x_ptr)
s)
;
const dtype_
%(alpha)
s alpha = ((dtype_
%(alpha)
s*)
%(alpha)
s->data
)[0];
const dtype_
%(alpha)
s alpha = ((dtype_
%(alpha)
s*)
PyArray_DATA(
%(alpha)
s)
)[0];
npy_intp Sz =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(zn)
s->strides
[1] / PyArray_DESCR(
%(zn)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(zn)
s)
[1] / PyArray_DESCR(
%(zn)
s)->elsize;
npy_intp Sval =
%(x_val)
s->strides
[0] / PyArray_DESCR(
%(x_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(x_val)
s)
[0] / PyArray_DESCR(
%(x_val)
s)->elsize;
npy_intp Sind =
%(x_ind)
s->strides
[0] / PyArray_DESCR(
%(x_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(x_ind)
s)
[0] / PyArray_DESCR(
%(x_ind)
s)->elsize;
npy_intp Sptr =
%(x_ptr)
s->strides
[0] / PyArray_DESCR(
%(x_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(x_ptr)
s)
[0] / PyArray_DESCR(
%(x_ptr)
s)->elsize;
npy_intp Sy =
%(y)
s->strides
[1] / PyArray_DESCR(
%(y)
s)->elsize;
npy_intp Sy =
PyArray_STRIDES(
%(y)
s)
[1] / PyArray_DESCR(
%(y)
s)->elsize;
if (!(
%(inplace)
s))
if (!(
%(inplace)
s))
...
@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
...
@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
const dtype_
%(x_val)
s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
const dtype_
%(x_val)
s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
dtype_
%(y)
s* y_row = (dtype_
%(y)
s*)(
%(y)
s->data +
%(y)
s->strides
[0] * k);
dtype_
%(y)
s* y_row = (dtype_
%(y)
s*)(
PyArray_BYTES(
%(y)
s) + PyArray_STRIDES(
%(y)
s)
[0] * k);
// axpy expects pointer to the beginning of memory arrays,
// axpy expects pointer to the beginning of memory arrays,
// so when the stride is negative, we need to get the
// so when the stride is negative, we need to get the
// last element
// last element
if (Sy < 0)
if (Sy < 0)
y_row += (K - 1) * Sy;
y_row += (K - 1) * Sy;
dtype_
%(zn)
s* z_row = (dtype_
%(zn)
s*)(
%(zn)
s->data +
%(zn)
s->strides
[0] * m);
dtype_
%(zn)
s* z_row = (dtype_
%(zn)
s*)(
PyArray_BYTES(
%(zn)
s) + PyArray_STRIDES(
%(zn)
s)
[0] * m);
if (Szn < 0)
if (Szn < 0)
z_row += (N - 1) * Szn;
z_row += (N - 1) * Szn;
...
@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
...
@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
if (PyArray_NDIM(
%(b_ind)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ind)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(b_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(b_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(b_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(b_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
...
@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
{
{
// sparse array has size MxK, dense KxN, output MxN
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = PyArray_DIMS(
%(a_ptr)
s)[0] - 1;
npy_intp M = PyArray_DIMS(
%(a_ptr)
s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)
%(a_dim)
s->data
)[0];
npy_intp a_dim_0 = ((npy_int32 *)
PyArray_DATA(
%(a_dim)
s)
)[0];
npy_intp a_dim_1 = ((npy_int32 *)
%(a_dim)
s->data
)[1];
npy_intp a_dim_1 = ((npy_int32 *)
PyArray_DATA(
%(a_dim)
s)
)[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sa_val =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sa_val =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sa_ind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sa_ind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sa_ptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sa_ptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sb_val =
%(b_val)
s->strides
[0] / PyArray_DESCR(
%(b_val)
s)->elsize;
npy_intp Sb_val =
PyArray_STRIDES(
%(b_val)
s)
[0] / PyArray_DESCR(
%(b_val)
s)->elsize;
npy_intp Sb_ind =
%(b_ind)
s->strides
[0] / PyArray_DESCR(
%(b_ind)
s)->elsize;
npy_intp Sb_ind =
PyArray_STRIDES(
%(b_ind)
s)
[0] / PyArray_DESCR(
%(b_ind)
s)->elsize;
npy_intp Sb_ptr =
%(b_ptr)
s->strides
[0] / PyArray_DESCR(
%(b_ptr)
s)->elsize;
npy_intp Sb_ptr =
PyArray_STRIDES(
%(b_ptr)
s)
[0] / PyArray_DESCR(
%(b_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Da_val = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Da_val = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Da_ind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Da_ind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s)
;
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s)
;
const dtype_
%(b_val)
s* __restrict__ Db_val = (dtype_
%(b_val)
s*)
%(b_val)
s->data
;
const dtype_
%(b_val)
s* __restrict__ Db_val = (dtype_
%(b_val)
s*)
PyArray_DATA(
%(b_val)
s)
;
const npy_int32 * __restrict__ Db_ind = (npy_int32*)
%(b_ind)
s->data
;
const npy_int32 * __restrict__ Db_ind = (npy_int32*)
PyArray_DATA(
%(b_ind)
s)
;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)
%(b_ptr)
s->data
;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)
PyArray_DATA(
%(b_ptr)
s)
;
npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
...
@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s ||
if (!
%(_zout)
s ||
...
@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
...
@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
...
@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0];
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0];
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
...
@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
npy_int32 i = indices[i_idx];
npy_int32 i = indices[i_idx];
// extract i-th row of dense matrix
// extract i-th row of dense matrix
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
%(_b)
s->data
+ Sb * i);
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
PyArray_BYTES(
%(_b)
s)
+ Sb * i);
// write resulting gradient to sparse output
// write resulting gradient to sparse output
zout[i_idx] = data[i_idx] * b_row[j];
zout[i_idx] = data[i_idx] * b_row[j];
...
@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
...
@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s ||
if (!
%(_zout)
s ||
...
@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
...
@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
...
@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0];
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0];
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
{
{
// extract i-th row of dense matrix
// extract i-th row of dense matrix
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
%(_b)
s->data
+ Sb * j);
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
PyArray_BYTES(
%(_b)
s)
+ Sb * j);
// for each non-null value in the sparse column
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
...
@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
...
@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
%(fail)
s;
%(fail)
s;
}
}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s
if (!
%(_zout)
s
...
@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
...
@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
...
@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
...
@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
%(_b)
s->data
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
PyArray_DATA(
%(_b)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
// loop over rows
// loop over rows
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
%(fail)
s;
%(fail)
s;
}
}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s
if (!
%(_zout)
s
...
@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
%(_b)
s->data
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
PyArray_DATA(
%(_b)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
...
@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (PyArray_NDIM(
%(y)
s) != 2) {
if (PyArray_NDIM(
%(y)
s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(x)
s)->type_num
!=
%(typenum_x)
s) {
if (PyArray_
TYPE(
%(x)
s)
!=
%(typenum_x)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x");
"Invalid type for x");
%(fail)
s;}
%(fail)
s;}
if (PyArray_
DESCR(
%(y)
s)->type_num
!=
%(typenum_y)
s) {
if (PyArray_
TYPE(
%(y)
s)
!=
%(typenum_y)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y");
"Invalid type for y");
%(fail)
s;}
%(fail)
s;}
if (PyArray_
DESCR(
%(p_data)
s)->type_num
!=
%(typenum_p)
s) {
if (PyArray_
TYPE(
%(p_data)
s)
!=
%(typenum_p)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern");
"Invalid type for pattern");
%(fail)
s;}
%(fail)
s;}
...
@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)
s;}
%(fail)
s;}
if (PyArray_DIMS(
%(y)
s)[0] != ((npy_int32 *)
%(p_ncols)
s->data
)[0] ||
if (PyArray_DIMS(
%(y)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(p_ncols)
s)
)[0] ||
PyArray_DIMS(
%(x)
s)[0] != (PyArray_DIMS(
%(p_ptr)
s)[0] - 1))
PyArray_DIMS(
%(x)
s)[0] != (PyArray_DIMS(
%(p_ptr)
s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError,
{PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match");
%(fail)
s;}
"The dimension of the pattern and the output must match");
%(fail)
s;}
...
@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
// Allocate output
// Allocate output
if (!
%(z_data)
s
if (!
%(z_data)
s
|| (PyArray_DIMS(
%(z_data)
s)[0] != PyArray_DIMS(
%(p_data)
s)[0])
|| (PyArray_DIMS(
%(z_data)
s)[0] != PyArray_DIMS(
%(p_data)
s)[0])
|| (PyArray_
DESCR(
%(z_data)
s)->type_num
!=
%(typenum_zd)
s)
|| (PyArray_
TYPE(
%(z_data)
s)
!=
%(typenum_zd)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_data)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_data)
s)))
{
{
{Py_XDECREF(
%(z_data)
s);}
{Py_XDECREF(
%(z_data)
s);}
...
@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
}
}
if (!
%(z_ind)
s
if (!
%(z_ind)
s
|| (PyArray_DIMS(
%(z_ind)
s)[0] != PyArray_DIMS(
%(p_ind)
s)[0])
|| (PyArray_DIMS(
%(z_ind)
s)[0] != PyArray_DIMS(
%(p_ind)
s)[0])
|| (PyArray_
DESCR(
%(z_ind)
s)->type_num
!=
%(typenum_zi)
s)
|| (PyArray_
TYPE(
%(z_ind)
s)
!=
%(typenum_zi)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_ind)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_ind)
s)))
{
{
{Py_XDECREF(
%(z_ind)
s);}
{Py_XDECREF(
%(z_ind)
s);}
...
@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
}
}
if (!
%(z_ptr)
s
if (!
%(z_ptr)
s
|| (PyArray_DIMS(
%(z_ptr)
s)[0] != PyArray_DIMS(
%(p_ptr)
s)[0])
|| (PyArray_DIMS(
%(z_ptr)
s)[0] != PyArray_DIMS(
%(p_ptr)
s)[0])
|| (PyArray_
DESCR(
%(z_ptr)
s)->type_num
!=
%(typenum_zp)
s)
|| (PyArray_
TYPE(
%(z_ptr)
s)
!=
%(typenum_zp)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_ptr)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_ptr)
s)))
{
{
{Py_XDECREF(
%(z_ptr)
s);}
{Py_XDECREF(
%(z_ptr)
s);}
...
@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
npy_intp K = PyArray_DIMS(
%(y)
s)[1];
npy_intp K = PyArray_DIMS(
%(y)
s)[1];
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
const dtype_
%(x)
s* __restrict__ Dx = (dtype_
%(x)
s*)
%(x)
s->data
;
const dtype_
%(x)
s* __restrict__ Dx = (dtype_
%(x)
s*)
PyArray_DATA(
%(x)
s)
;
const dtype_
%(y)
s* __restrict__ Dy = (dtype_
%(y)
s*)
%(y)
s->data
;
const dtype_
%(y)
s* __restrict__ Dy = (dtype_
%(y)
s*)
PyArray_DATA(
%(y)
s)
;
const dtype_
%(p_data)
s* __restrict__ Dpd = (dtype_
%(p_data)
s*)
%(p_data)
s->data
;
const dtype_
%(p_data)
s* __restrict__ Dpd = (dtype_
%(p_data)
s*)
PyArray_DATA(
%(p_data)
s)
;
const dtype_
%(p_ind)
s* __restrict__ Dpi = (dtype_
%(p_ind)
s*)
%(p_ind)
s->data
;
const dtype_
%(p_ind)
s* __restrict__ Dpi = (dtype_
%(p_ind)
s*)
PyArray_DATA(
%(p_ind)
s)
;
const dtype_
%(p_ptr)
s* __restrict__ Dpp = (dtype_
%(p_ptr)
s*)
%(p_ptr)
s->data
;
const dtype_
%(p_ptr)
s* __restrict__ Dpp = (dtype_
%(p_ptr)
s*)
PyArray_DATA(
%(p_ptr)
s)
;
dtype_
%(z_data)
s* __restrict__ Dzd = (dtype_
%(z_data)
s*)
%(z_data)
s->data
;
dtype_
%(z_data)
s* __restrict__ Dzd = (dtype_
%(z_data)
s*)
PyArray_DATA(
%(z_data)
s)
;
dtype_
%(z_ind)
s* __restrict__ Dzi = (dtype_
%(z_ind)
s*)
%(z_ind)
s->data
;
dtype_
%(z_ind)
s* __restrict__ Dzi = (dtype_
%(z_ind)
s*)
PyArray_DATA(
%(z_ind)
s)
;
dtype_
%(z_ptr)
s* __restrict__ Dzp = (dtype_
%(z_ptr)
s*)
%(z_ptr)
s->data
;
dtype_
%(z_ptr)
s* __restrict__ Dzp = (dtype_
%(z_ptr)
s*)
PyArray_DATA(
%(z_ptr)
s)
;
const npy_intp Sdx =
%(x)
s->strides
[1]/PyArray_DESCR(
%(x)
s)->elsize;
const npy_intp Sdx =
PyArray_STRIDES(
%(x)
s)
[1]/PyArray_DESCR(
%(x)
s)->elsize;
const npy_intp Sdy =
%(y)
s->strides
[1]/PyArray_DESCR(
%(y)
s)->elsize;
const npy_intp Sdy =
PyArray_STRIDES(
%(y)
s)
[1]/PyArray_DESCR(
%(y)
s)->elsize;
const npy_intp Sdpd =
%(p_data)
s->strides
[0] / PyArray_DESCR(
%(p_data)
s)->elsize;
const npy_intp Sdpd =
PyArray_STRIDES(
%(p_data)
s)
[0] / PyArray_DESCR(
%(p_data)
s)->elsize;
const npy_intp Sdpi =
%(p_ind)
s->strides
[0] / PyArray_DESCR(
%(p_ind)
s)->elsize;
const npy_intp Sdpi =
PyArray_STRIDES(
%(p_ind)
s)
[0] / PyArray_DESCR(
%(p_ind)
s)->elsize;
const npy_intp Sdpp =
%(p_ptr)
s->strides
[0] / PyArray_DESCR(
%(p_ptr)
s)->elsize;
const npy_intp Sdpp =
PyArray_STRIDES(
%(p_ptr)
s)
[0] / PyArray_DESCR(
%(p_ptr)
s)->elsize;
const npy_intp Sdzd =
%(z_data)
s->strides
[0] / PyArray_DESCR(
%(z_data)
s)->elsize;
const npy_intp Sdzd =
PyArray_STRIDES(
%(z_data)
s)
[0] / PyArray_DESCR(
%(z_data)
s)->elsize;
const npy_intp Sdzi =
%(z_ind)
s->strides
[0] / PyArray_DESCR(
%(z_ind)
s)->elsize;
const npy_intp Sdzi =
PyArray_STRIDES(
%(z_ind)
s)
[0] / PyArray_DESCR(
%(z_ind)
s)->elsize;
const npy_intp Sdzp =
%(z_ptr)
s->strides
[0] / PyArray_DESCR(
%(z_ptr)
s)->elsize;
const npy_intp Sdzp =
PyArray_STRIDES(
%(z_ptr)
s)
[0] / PyArray_DESCR(
%(z_ptr)
s)->elsize;
memcpy(Dzi, Dpi, PyArray_DIMS(
%(p_ind)
s)[0]*sizeof(dtype_
%(p_ind)
s));
memcpy(Dzi, Dpi, PyArray_DIMS(
%(p_ind)
s)[0]*sizeof(dtype_
%(p_ind)
s));
memcpy(Dzp, Dpp, PyArray_DIMS(
%(p_ptr)
s)[0]*sizeof(dtype_
%(p_ptr)
s));
memcpy(Dzp, Dpp, PyArray_DIMS(
%(p_ptr)
s)[0]*sizeof(dtype_
%(p_ptr)
s));
...
@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const dtype_
%(x)
s* x_row = (dtype_
%(x)
s*)(
%(x)
s->data +
%(x)
s->strides
[0] * m);
const dtype_
%(x)
s* x_row = (dtype_
%(x)
s*)(
PyArray_BYTES(
%(x)
s) + PyArray_STRIDES(
%(x)
s)
[0] * m);
const dtype_
%(y)
s* y_col = (dtype_
%(y)
s*)(
%(y)
s->data +
%(y)
s->strides
[0] * n);
const dtype_
%(y)
s* y_col = (dtype_
%(y)
s*)(
PyArray_BYTES(
%(y)
s) + PyArray_STRIDES(
%(y)
s)
[0] * n);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] *
%(cdot)
s((int*)&K, (const
%(conv_type)
s*)x_row, (int*)&Sdx, (const
%(conv_type)
s*)y_col, (int*)&Sdy);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] *
%(cdot)
s((int*)&K, (const
%(conv_type)
s*)x_row, (int*)&Sdx, (const
%(conv_type)
s*)y_col, (int*)&Sdy);
}
}
...
...
theano/tensor/basic.py
浏览文件 @
9950ce08
...
@@ -3905,7 +3905,7 @@ class Reshape(Op):
...
@@ -3905,7 +3905,7 @@ class Reshape(Op):
}
}
Py_XDECREF(
%(z)
s);
Py_XDECREF(
%(z)
s);
%(z)
s = (PyArrayObject *) PyArray_Newshape(
%(x)
s, &newshape,
%(z)
s = (PyArrayObject *) PyArray_Newshape(
%(x)
s, &newshape,
PyArray
_CORDER);
NPY
_CORDER);
if (!
%(z)
s)
if (!
%(z)
s)
{
{
//The error message should have been set by PyArray_Newshape
//The error message should have been set by PyArray_Newshape
...
...
theano/tensor/elemwise.py
浏览文件 @
9950ce08
...
@@ -336,7 +336,7 @@ class DimShuffle(Op):
...
@@ -336,7 +336,7 @@ class DimShuffle(Op):
'PyArray_UpdateFlags(
%(res)
s, NPY_ARRAY_UPDATE_ALL)'
,
'PyArray_UpdateFlags(
%(res)
s, NPY_ARRAY_UPDATE_ALL)'
,
#we are making a view in both inplace and non-inplace cases
#we are making a view in both inplace and non-inplace cases
"""
"""
#if NPY_
VERSION <= 0x01000009
#if NPY_
API_VERSION < 0x00000007
PyArray_BASE(
%(res)
s) = (PyObject*)
%(basename)
s;
PyArray_BASE(
%(res)
s) = (PyObject*)
%(basename)
s;
#else
#else
PyArray_SetBaseObject(
%(res)
s, (PyObject*)
%(basename)
s);
PyArray_SetBaseObject(
%(res)
s, (PyObject*)
%(basename)
s);
...
...
theano/tensor/nnet/nnet.py
浏览文件 @
9950ce08
...
@@ -118,14 +118,14 @@ class SoftmaxWithBias(gof.Op):
...
@@ -118,14 +118,14 @@ class SoftmaxWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)
s;
%(fail)
s;
}
}
if ((PyArray_
DESCR(
%(x)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(x)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(x)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(x)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError, "not a float");
PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)
s;
%(fail)
s;
}
}
if ((PyArray_
DESCR(
%(b)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(b)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(b)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(b)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError, "b not float");
PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)
s;
%(fail)
s;
...
@@ -264,15 +264,15 @@ class SoftmaxGrad(gof.Op):
...
@@ -264,15 +264,15 @@ class SoftmaxGrad(gof.Op):
dy
,
sm
=
inp
dy
,
sm
=
inp
dx
,
=
out
dx
,
=
out
return
'''
return
'''
if ((PyArray_
DESCR(
%(dy)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(dy)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(dy)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(dy)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError,
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
"types should be float or float64");
%(fail)
s;
%(fail)
s;
}
}
if ((PyArray_
DESCR(
%(sm)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(sm)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(sm)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(sm)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError,
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
"types should be float or float64");
...
@@ -395,23 +395,23 @@ class Softmax(gof.Op):
...
@@ -395,23 +395,23 @@ class Softmax(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl
=
"""
init_decl
=
"""
npy_intp* Nx =
%(x)
s->dimensions
;
npy_intp* Nx =
PyArray_DIMS(
%(x)
s)
;
if (
%(x)
s->nd
!= 2)
if (
PyArray_NDIM(
%(x)
s)
!= 2)
{
{
PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
%(fail)
s;
%(fail)
s;
}
}
if ((
%(x)
s->descr->type_num != PyArray
_DOUBLE) &&
if ((
PyArray_TYPE(
%(x)
s) != NPY
_DOUBLE) &&
(
%(x)
s->descr->type_num != PyArray
_FLOAT))
(
PyArray_TYPE(
%(x)
s) != NPY
_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError, "not a float");
PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)
s;
%(fail)
s;
}
}
if ((NULL ==
%(sm)
s)
if ((NULL ==
%(sm)
s)
|| (
%(sm)
s->dimensions[0] !=
%(x)
s->dimensions
[0])
|| (
PyArray_DIMS(
%(sm)
s)[0] != PyArray_DIMS(
%(x)
s)
[0])
|| (
%(sm)
s->dimensions[1] !=
%(x)
s->dimensions
[1]))
|| (
PyArray_DIMS(
%(sm)
s)[1] != PyArray_DIMS(
%(x)
s)
[1]))
{
{
if (NULL !=
%(sm)
s) Py_XDECREF(
%(sm)
s);
if (NULL !=
%(sm)
s) Py_XDECREF(
%(sm)
s);
%(sm)
s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(
%(x)
s),
%(sm)
s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(
%(x)
s),
...
@@ -431,13 +431,13 @@ class Softmax(gof.Op):
...
@@ -431,13 +431,13 @@ class Softmax(gof.Op):
double sum = 0.0;
double sum = 0.0;
bool discount_max = false;
bool discount_max = false;
const dtype_
%(x)
s* __restrict__ x_i = (dtype_
%(x)
s*)(
%(x)
s->data +
%(x)
s->strides
[0] * i);
const dtype_
%(x)
s* __restrict__ x_i = (dtype_
%(x)
s*)(
PyArray_BYTES(
%(x)
s) + PyArray_STRIDES(
%(x)
s)
[0] * i);
dtype_
%(sm)
s* __restrict__ sm_i = (dtype_
%(sm)
s*)(
%(sm)
s->data +
%(sm)
s->strides
[0] * i);
dtype_
%(sm)
s* __restrict__ sm_i = (dtype_
%(sm)
s*)(
PyArray_BYTES(
%(sm)
s) + PyArray_STRIDES(
%(sm)
s)
[0] * i);
"""
"""
inside_row_loop
=
"""
inside_row_loop
=
"""
npy_intp Sx =
%(x)
s->strides
[1]/sizeof(dtype_
%(x)
s);
npy_intp Sx =
PyArray_STRIDES(
%(x)
s)
[1]/sizeof(dtype_
%(x)
s);
npy_intp Ssm =
%(sm)
s->strides
[1]/sizeof(dtype_
%(sm)
s);
npy_intp Ssm =
PyArray_STRIDES(
%(sm)
s)
[1]/sizeof(dtype_
%(sm)
s);
size_t row_max_j=0;
size_t row_max_j=0;
dtype_
%(sm)
s row_max = x_i[0];
dtype_
%(sm)
s row_max = x_i[0];
...
@@ -1018,15 +1018,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
...
@@ -1018,15 +1018,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
y_idx_type
=
node
.
inputs
[
2
]
.
type
.
dtype_specs
()[
1
]
y_idx_type
=
node
.
inputs
[
2
]
.
type
.
dtype_specs
()[
1
]
return
"""
return
"""
if ((PyArray_
DESCR(
%(dnll)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(dnll)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(dnll)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(dnll)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError,
PyErr_SetString(PyExc_TypeError,
"dnll type should be float32 or float64");
"dnll type should be float32 or float64");
%(fail)
s;
%(fail)
s;
}
}
if ((PyArray_
DESCR(
%(sm)
s)->type_num
!= NPY_DOUBLE) &&
if ((PyArray_
TYPE(
%(sm)
s)
!= NPY_DOUBLE) &&
(PyArray_
DESCR(
%(sm)
s)->type_num
!= NPY_FLOAT))
(PyArray_
TYPE(
%(sm)
s)
!= NPY_FLOAT))
{
{
PyErr_SetString(PyExc_TypeError,
PyErr_SetString(PyExc_TypeError,
"sm type should be float32 or float64");
"sm type should be float32 or float64");
...
...
theano/tensor/subtensor.py
浏览文件 @
9950ce08
...
@@ -9,14 +9,13 @@ _logger = logging.getLogger("theano.tensor.subtensor")
...
@@ -9,14 +9,13 @@ _logger = logging.getLogger("theano.tensor.subtensor")
import
numpy
import
numpy
import
theano
import
theano
from
theano.compat.six
import
StringIO
from
theano.gradient
import
DisconnectedType
from
theano.gradient
import
DisconnectedType
from
theano
import
gof
from
theano
import
gof
from
theano.gof
import
Apply
,
Constant
,
hashtype
,
Op
,
Type
,
MethodNotDefined
from
theano.gof
import
Apply
,
Constant
,
hashtype
,
Op
,
Type
,
MethodNotDefined
from
theano.gof.python25
import
maxsize
from
theano.gof.python25
import
maxsize
from
theano.printing
import
pprint
from
theano.printing
import
pprint
from
theano
import
scalar
as
scal
from
theano
import
scalar
as
scal
from
theano.tensor.basic
import
(
addbroadcast
,
clip
,
sum
,
exp
,
from
theano.tensor.basic
import
(
addbroadcast
,
clip
,
ARange
,
TensorType
)
ARange
,
TensorType
)
from
theano.tensor.elemwise
import
DimShuffle
from
theano.tensor.elemwise
import
DimShuffle
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
make_slice
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
make_slice
...
@@ -533,57 +532,29 @@ class Subtensor(Op):
...
@@ -533,57 +532,29 @@ class Subtensor(Op):
return
{
return
{
"c_prefix"
:
"PyArray"
,
"c_prefix"
:
"PyArray"
,
"update_flags"
:
(
"PyArray_UpdateFlags(
%(view_name)
s,"
" NPY_ARRAY_C_CONTIGUOUS|"
"NPY_ARRAY_F_CONTIGUOUS);"
),
"set_data"
:
"PyArray_set_data"
,
"set_dim"
:
"PyArray_set_dim"
,
"set_stride"
:
"PyArray_set_stride"
,
"strides_mul"
:
1
,
"strides_mul"
:
1
,
"view_name"
:
"xview"
}
}
@staticmethod
@staticmethod
def
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
idx_list
,
def
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
idx_list
,
view_ndim
,
c_prefix
=
None
,
c_prefix
=
None
,
update_flags
=
None
,
set_data
=
None
,
set_dim
=
None
,
set_stride
=
None
,
strides_mul
=
None
,
strides_mul
=
None
,
view_name
=
None
):
):
"""
"""
The parameters c_prefix, update_flags, set_data, set_dim,
The parameters c_prefix are there to allow reusing this
set_stride and strides_mul are there to allow reusing this
function on PyArray and CudaNdarray object.
function on PyArray and CudaNdarray object.
This fct take as input the x,
"""
"""
default_args
=
Subtensor
.
default_helper_c_code_args
()
default_args
=
Subtensor
.
default_helper_c_code_args
()
if
update_flags
is
None
:
update_flags
=
default_args
[
'update_flags'
]
if
set_data
is
None
:
set_data
=
default_args
[
'set_data'
]
if
set_dim
is
None
:
set_dim
=
default_args
[
'set_dim'
]
if
set_stride
is
None
:
set_stride
=
default_args
[
'set_stride'
]
if
strides_mul
is
None
:
if
strides_mul
is
None
:
strides_mul
=
default_args
[
'strides_mul'
]
strides_mul
=
default_args
[
'strides_mul'
]
if
c_prefix
is
None
:
if
c_prefix
is
None
:
c_prefix
=
default_args
[
'c_prefix'
]
c_prefix
=
default_args
[
'c_prefix'
]
if
view_name
is
None
:
view_name
=
default_args
[
'view_name'
]
#update_flags may depend on view_name
update_flags
=
update_flags
%
locals
()
#
#
# two arrays are created in C code:
# two arrays are created in C code:
# is_slice: len == ndim, 0 means int, 1 means slice
# is_slice: len == ndim, 0 means int, 1 means slice
...
@@ -657,12 +628,7 @@ class Subtensor(Op):
...
@@ -657,12 +628,7 @@ class Subtensor(Op):
x
,
=
inputs
[:
1
]
x
,
=
inputs
[:
1
]
z
,
=
outputs
z
,
=
outputs
xview
=
view_name
rval
=
"""
rval
=
"""
#define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d
#define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d
#define PyArray_set_data(obj, ptr, base) PyArray_BYTES(obj)=ptr
// The subtensor is created by iterating over the dimensions
// The subtensor is created by iterating over the dimensions
// and updating stride, shape, and data pointers
// and updating stride, shape, and data pointers
...
@@ -674,32 +640,10 @@ class Subtensor(Op):
...
@@ -674,32 +640,10 @@ class Subtensor(Op):
int inner_ii = 0; // the current dimension of zview
int inner_ii = 0; // the current dimension of zview
int outer_ii = 0; // current dimension of z
int outer_ii = 0; // current dimension of z
char* ptr = (char*)
%(c_prefix)
s_BYTES(
%(xview)
s);
// Argument of the view
ssize_t xview_offset = 0;
if ((
%(c_prefix)
s_DIMS(
%(xview)
s) ==
%(c_prefix)
s_DIMS(
%(x)
s))
ssize_t xview_dims[
%(view_ndim)
s];
&& (
%(c_prefix)
s_DIMS(
%(x)
s) != NULL))
ssize_t xview_strides[
%(view_ndim)
s];
{
PyErr_Format(PyExc_ValueError, "x and
%(xview)
s"
"(with
%%
d dims) have the same dimensions"
" pointers:
%%
p and
%%
p",
%(c_prefix)
s_NDIM(
%(x)
s),
%(c_prefix)
s_DIMS(
%(xview)
s),
%(c_prefix)
s_DIMS(
%(x)
s));
Py_XDECREF(
%(xview)
s);
%(fail)
s;
}
if (
%(c_prefix)
s_STRIDES(
%(xview)
s) ==
%(c_prefix)
s_STRIDES(
%(x)
s)
&& (
%(c_prefix)
s_DIMS(
%(x)
s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and
%(xview)
s"
"(with
%%
d dims) have the same strides"
" pointers:
%%
p and
%%
p",
%(c_prefix)
s_NDIM(
%(x)
s),
%(c_prefix)
s_STRIDES(
%(xview)
s),
%(c_prefix)
s_STRIDES(
%(x)
s));
Py_XDECREF(
%(xview)
s);
%(fail)
s;
}
for (; outer_ii <
%(len_is_slice)
s; ++outer_ii)
for (; outer_ii <
%(len_is_slice)
s; ++outer_ii)
{
{
...
@@ -719,10 +663,8 @@ class Subtensor(Op):
...
@@ -719,10 +663,8 @@ class Subtensor(Op):
// PySlice_GetIndicesEx in python source
// PySlice_GetIndicesEx in python source
if (!step)
if (!step)
{
{
Py_DECREF(
%(xview)
s);
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"slice step cannot be zero");
"slice step cannot be zero");
Py_XDECREF(
%(xview)
s);
%(fail)
s;
%(fail)
s;
}
}
...
@@ -771,11 +713,10 @@ class Subtensor(Op):
...
@@ -771,11 +713,10 @@ class Subtensor(Op):
assert (slicelength <= length);
assert (slicelength <= length);
ptr
+=
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * start *
xview_offset
+=
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * start *
%(strides_mul)
s;
%(strides_mul)
s;
%(set_dim)
s(
%(xview)
s, inner_ii, slicelength);
xview_dims[inner_ii] = slicelength;
%(set_stride)
s(
%(xview)
s, inner_ii,
xview_strides[inner_ii] =
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * step;
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * step);
inner_ii += 1;
inner_ii += 1;
spec_pos += 3;
spec_pos += 3;
...
@@ -788,46 +729,41 @@ class Subtensor(Op):
...
@@ -788,46 +729,41 @@ class Subtensor(Op):
{
{
if (idx <
%(c_prefix)
s_DIMS(
%(x)
s)[outer_ii])
if (idx <
%(c_prefix)
s_DIMS(
%(x)
s)[outer_ii])
{
{
ptr
+=
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * idx *
xview_offset
+=
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii] * idx *
%(strides_mul)
s;
%(strides_mul)
s;
}
}
else
else
{
{
PyErr_Format(PyExc_IndexError,"index out of bounds");
PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(
%(xview)
s);
%(fail)
s;
%(fail)
s;
}
}
}
}
else
else
{
{
PyErr_Format(PyExc_IndexError,"index out of bounds");
PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(
%(xview)
s);
%(fail)
s;
%(fail)
s;
}
}
spec_pos += 1;
spec_pos += 1;
}
}
}
}
%(set_data)
s(
%(xview)
s, ptr, (PyObject*)NULL);
assert (inner_ii <=
%(view_ndim)
s);
assert (inner_ii <=
%(c_prefix)
s_NDIM(
%(xview)
s));
while (inner_ii <
%(view_ndim)
s)
while (inner_ii <
%(c_prefix)
s_NDIM(
%(xview)
s))
{
{
assert (outer_ii <
%(c_prefix)
s_NDIM(
%(x)
s));
assert (outer_ii <
%(c_prefix)
s_NDIM(
%(x)
s));
%(set_dim)
s(
%(xview)
s, inner_ii,
xview_dims[inner_ii] =
%(c_prefix)
s_DIMS(
%(x)
s)[outer_ii];
%(c_prefix)
s_DIMS(
%(x)
s)[outer_ii]);
xview_strides[inner_ii] =
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii];
%(set_stride)
s(
%(xview)
s, inner_ii,
%(c_prefix)
s_STRIDES(
%(x)
s)[outer_ii]);
inner_ii += 1;
inner_ii += 1;
outer_ii += 1;
outer_ii += 1;
}
}
%(update_flags)
s
"""
%
locals
()
"""
%
locals
()
# print rval
# print rval
return
rval
return
rval
@staticmethod
@staticmethod
def
helper_c_code_cache_version
():
def
helper_c_code_cache_version
():
return
(
5
,)
return
(
6
,)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
# DEBUG
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
# DEBUG
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
theano
.
tensor
.
TensorType
):
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
theano
.
tensor
.
TensorType
):
...
@@ -838,36 +774,45 @@ class Subtensor(Op):
...
@@ -838,36 +774,45 @@ class Subtensor(Op):
view_ndim
=
node
.
outputs
[
0
]
.
ndim
view_ndim
=
node
.
outputs
[
0
]
.
ndim
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
decl
=
"PyArrayObject * xview = NULL;"
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
view_ndim
)
build_view
=
"""
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
//TODO: alternatively, fix the memory leak on failure
Py_INCREF(PyArray_DESCR(
%(x)
s));
Py_INCREF(PyArray_DESCR(
%(x)
s));
PyArrayObject *
xview = (PyArrayObject*)PyArray_NewFromDescr(
xview = (PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type,
&PyArray_Type,
PyArray_DESCR(
%(x)
s),
PyArray_DESCR(
%(x)
s),
%(view_ndim)
s,
%(view_ndim)
s,
PyArray_DIMS(
%(x)
s)
,
xview_dims
,
PyArray_STRIDES(
%(x)
s)
,
xview_strides
,
PyArray_
DATA(
%(x)
s)
,
PyArray_
BYTES(
%(x)
s) + xview_offset
,
%(x)
s->flags
,
PyArray_FLAGS(
%(x)
s)
,
NULL);
NULL);
assert (PyArray_NDIM(xview) ==
%(view_ndim)
s);
if (!xview)
if (!xview)
{
{
%(fail)
s;
%(fail)
s;
}
}
"""
%
locals
()
"""
%
locals
()
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
)
finish_view
=
"""
finish_view
=
"""
if (
%(z)
s) Py_DECREF(
%(z)
s);
//This is needed for NumPy 1.5, but not 1.7.2
PyArray_UpdateFlags(xview, NPY_ARRAY_C_CONTIGUOUS| NPY_ARRAY_F_CONTIGUOUS);
Py_XDECREF(
%(z)
s);
Py_INCREF(py_
%(x)
s);
Py_INCREF(py_
%(x)
s);
#if NPY_API_VERSION < 0x00000007
PyArray_BASE(xview) = py_
%(x)
s;
PyArray_BASE(xview) = py_
%(x)
s;
#else
PyArray_SetBaseObject(xview, py_
%(x)
s);
#endif
assert(py_
%(x)
s == (PyObject*)
%(x)
s);
assert(py_
%(x)
s == (PyObject*)
%(x)
s);
%(z)
s = xview;
%(z)
s = xview;
"""
%
locals
()
"""
%
locals
()
return
build_view
+
"{"
+
get_xview
+
"}"
+
finish_view
return
decl
+
get_xview
+
build_view
+
finish_view
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
hv
=
self
.
helper_c_code_cache_version
()
hv
=
self
.
helper_c_code_cache_version
()
...
@@ -1150,6 +1095,9 @@ class IncSubtensor(Op):
...
@@ -1150,6 +1095,9 @@ class IncSubtensor(Op):
(
x
,
y
)
+
inputs
,
(
x
,
y
)
+
inputs
,
[
x
.
type
()])
[
x
.
type
()])
def
decl_view
(
self
):
return
"PyArrayObject * zview = NULL;"
def
perform
(
self
,
node
,
inputs
,
out_
):
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
out
,
=
out_
x
,
y
=
inputs
[:
2
]
x
,
y
=
inputs
[:
2
]
...
@@ -1237,16 +1185,28 @@ class IncSubtensor(Op):
...
@@ -1237,16 +1185,28 @@ class IncSubtensor(Op):
}
}
else
else
{
{
if (
%(z)
s) Py_
DECREF(
%(z)
s);
Py_X
DECREF(
%(z)
s);
%(z)
s =
%(copy_of_x)
s;
%(z)
s =
%(copy_of_x)
s;
}
}
"""
%
locals
()
"""
%
locals
()
# get info needed to make zview: a view of %(z)s
helper_args
=
self
.
get_helper_c_code_args
()
get_zview
=
Subtensor
.
helper_c_code
(
node
=
node
,
name
=
name
,
inputs
=
outputs
[:
1
]
+
inputs
[
2
:],
outputs
=
outputs
,
sub
=
sub
,
idx_list
=
self
.
idx_list
,
view_ndim
=
view_ndim
,
**
helper_args
)
#Make a view on the output, as we will write into it.
alloc_zview
=
self
.
make_view_array
(
z
,
view_ndim
)
alloc_zview
=
self
.
make_view_array
(
z
,
view_ndim
)
# On GPU, it takes two steps to make a view
link_zview
=
self
.
link_view_array
(
z
,
fail
)
#Make a first view on the output, as we will write into it.
build_view
=
"""
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
//TODO: alternatively, fix the memory leak on failure
...
@@ -1255,21 +1215,7 @@ class IncSubtensor(Op):
...
@@ -1255,21 +1215,7 @@ class IncSubtensor(Op):
{
{
%(fail)
s;
%(fail)
s;
}
}
%(link_zview)
s;
"""
%
locals
()
"""
%
locals
()
# make zview actually a view of %(z)s
helper_args
=
self
.
get_helper_c_code_args
()
helper_args
[
'view_name'
]
=
'zview'
get_zview
=
self
.
define_set_data
()
+
\
Subtensor
.
helper_c_code
(
node
=
node
,
name
=
name
,
inputs
=
outputs
[:
1
]
+
inputs
[
2
:],
outputs
=
outputs
,
sub
=
sub
,
idx_list
=
self
.
idx_list
,
**
helper_args
)
copy_into
=
self
.
copy_into
(
"zview"
,
y
)
copy_into
=
self
.
copy_into
(
"zview"
,
y
)
...
@@ -1289,12 +1235,12 @@ class IncSubtensor(Op):
...
@@ -1289,12 +1235,12 @@ class IncSubtensor(Op):
%(add_to_zview)
s
%(add_to_zview)
s
}
}
"""
%
locals
()
"""
%
locals
()
return
(
self
.
decl_view
()
+
return
(
copy_input_if_necessary
copy_input_if_necessary
+
+
build_view
get_zview
+
+
"{"
+
get_zview
+
"}"
build_view
+
+
make_modification
make_modification
+
+
"Py_DECREF(zview);"
"Py_DECREF(zview);"
)
)
def
do_type_checking
(
self
,
node
):
def
do_type_checking
(
self
,
node
):
...
@@ -1344,16 +1290,18 @@ class IncSubtensor(Op):
...
@@ -1344,16 +1290,18 @@ class IncSubtensor(Op):
"""
"""
return
"""Py_INCREF(PyArray_DESCR(
%(x)
s));
return
"""Py_INCREF(PyArray_DESCR(
%(x)
s));
PyArrayObject * zview =
zview = (PyArrayObject*)PyArray_NewFromDescr(
(PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type,
&PyArray_Type,
PyArray_DESCR(
%(x)
s),
PyArray_DESCR(
%(x)
s),
%(view_ndim)
s,
%(view_ndim)
s,
PyArray_DIMS(
%(x)
s),
xview_dims, //PyArray_DIMS(
%(x)
s),
PyArray_STRIDES(
%(x)
s),
xview_strides, //PyArray_STRIDES(
%(x)
s),
PyArray_DATA(
%(x)
s),
PyArray_BYTES(
%(x)
s) + xview_offset, //PyArray_DATA(
%(x)
s),
%(x)
s->flags,
PyArray_FLAGS(
%(x)
s),
NULL)"""
%
locals
()
NULL);
//This is needed for NumPy 1.5, but not 1.7.2
PyArray_UpdateFlags(zview, NPY_ARRAY_C_CONTIGUOUS| NPY_ARRAY_F_CONTIGUOUS);
"""
%
locals
()
def
get_helper_c_code_args
(
self
):
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to pass to helper_c_code."""
""" Return a dictionary of arguments to pass to helper_c_code."""
...
@@ -1369,24 +1317,6 @@ class IncSubtensor(Op):
...
@@ -1369,24 +1317,6 @@ class IncSubtensor(Op):
"""
"""
return
"""PyArray_CopyInto(
%(view)
s,
%(source)
s)"""
%
locals
()
return
"""PyArray_CopyInto(
%(view)
s,
%(source)
s)"""
%
locals
()
def
define_set_data
(
self
):
""" Returns C code used to define any macros used in the
set data argument to the helper C code. """
return
""
def
link_view_array
(
self
,
x
,
fail
):
""" Returns code to complete making zview a view of x"""
# On CPU there is nothing to do, make_view_array already did this
return
""
def
set_view_base
(
self
,
x
,
fail
):
""" Returns code to make zview be a correct view of x,
after helper_c_code is done messing with x"""
# On CPU there is nothing to do
return
""
def
add_to_zview
(
self
,
x
,
fail
):
def
add_to_zview
(
self
,
x
,
fail
):
""" Return C code to add x to zview. Should DECREF zview if the
""" Return C code to add x to zview. Should DECREF zview if the
add fails."""
add fails."""
...
@@ -1567,7 +1497,7 @@ class AdvancedSubtensor1(Op):
...
@@ -1567,7 +1497,7 @@ class AdvancedSubtensor1(Op):
output_name
=
output_names
[
0
]
output_name
=
output_names
[
0
]
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
return
"""
return
"""
PyObject *indices;
Py
Array
Object *indices;
int i_type = PyArray_TYPE(
%(i_name)
s);
int i_type = PyArray_TYPE(
%(i_name)
s);
if (i_type != NPY_INTP) {
if (i_type != NPY_INTP) {
// Cast
%(i_name)
s to NPY_INTP (expected by PyArray_TakeFrom),
// Cast
%(i_name)
s to NPY_INTP (expected by PyArray_TakeFrom),
...
@@ -1602,13 +1532,13 @@ class AdvancedSubtensor1(Op):
...
@@ -1602,13 +1532,13 @@ class AdvancedSubtensor1(Op):
%(fail)
s;
%(fail)
s;
}
}
}
}
indices = PyArray_Cast(
%(i_name)
s, NPY_INTP);
indices =
(PyArrayObject*)
PyArray_Cast(
%(i_name)
s, NPY_INTP);
if (indices == NULL) {
if (indices == NULL) {
%(fail)
s;
%(fail)
s;
}
}
}
}
else {
else {
indices =
(PyObject *)
%(i_name)
s;
indices =
%(i_name)
s;
Py_INCREF(indices);
Py_INCREF(indices);
}
}
if (
%(output_name)
s != NULL) {
if (
%(output_name)
s != NULL) {
...
@@ -1637,7 +1567,7 @@ class AdvancedSubtensor1(Op):
...
@@ -1637,7 +1567,7 @@ class AdvancedSubtensor1(Op):
}
}
}
}
%(output_name)
s = (PyArrayObject*)PyArray_TakeFrom(
%(output_name)
s = (PyArrayObject*)PyArray_TakeFrom(
%(a_name)
s, indices, 0,
%(output_name)
s, NPY_RAISE);
%(a_name)
s,
(PyObject*)
indices, 0,
%(output_name)
s, NPY_RAISE);
Py_DECREF(indices);
Py_DECREF(indices);
if (
%(output_name)
s == NULL)
%(fail)
s;
if (
%(output_name)
s == NULL)
%(fail)
s;
"""
%
locals
()
"""
%
locals
()
...
...
theano/tensor/tests/test_blas.py
浏览文件 @
9950ce08
#from nose.plugins.skip import SkipTest
from
copy
import
copy
#import traceback
from
unittest
import
TestCase
import
itertools
import
sys
import
theano.tensor
as
T
from
theano
import
tensor
from
theano.compat
import
PY3
,
exc_message
from
theano.gof.python25
import
product
as
itertools_product
from
theano.gof.python25
import
any
from
theano.printing
import
pp
import
numpy
import
numpy
import
theano
from
numpy
import
(
arange
,
array
,
common_type
,
complex64
,
complex128
,
float32
,
from
numpy
import
(
arange
,
array
,
common_type
,
complex64
,
complex128
,
float32
,
float64
,
newaxis
,
shape
,
transpose
,
zeros
)
float64
,
newaxis
,
shape
,
transpose
,
zeros
)
from
numpy.testing
import
assert_array_almost_equal
from
numpy.testing
import
assert_array_almost_equal
#from numpy.testing import dec
#from numpy.testing.noseclasses import KnownFailureTest
import
theano
import
theano.tensor
as
T
from
theano
import
tensor
,
Param
,
shared
,
config
from
theano.compat
import
exc_message
from
theano.gof.python25
import
product
as
itertools_product
from
theano.gof.python25
import
any
from
theano.printing
import
pp
from
theano.tensor.blas
import
(
_dot22
,
_dot22scalar
,
res_is_a
,
_as_scalar
,
from
theano.tensor.blas
import
(
_dot22
,
_dot22scalar
,
res_is_a
,
_as_scalar
,
_is_real_matrix
,
_gemm_canonicalize
,
_is_real_matrix
,
_gemm_canonicalize
,
_factor_canonicalized
,
Gemm
,
Gemv
,
_factor_canonicalized
,
Gemm
,
Gemv
,
gemm_inplace
,
gemm_no_inplace
,
gemm_inplace
,
gemm_no_inplace
,
InconsistencyError
,
Ger
,
ger
,
ger_destructive
)
InconsistencyError
,
Ger
,
ger
,
ger_destructive
)
from
unittest
import
TestCase
from
theano.tests
import
unittest_tools
from
theano.tests
import
unittest_tools
from
copy
import
copy
,
deepcopy
from
theano
import
Param
,
shared
,
config
from
test_basic
import
(
_approx_eq
,
as_tensor_variable
,
inplace_func
,
from
test_basic
import
(
_approx_eq
,
as_tensor_variable
,
inplace_func
,
compile
,
inplace
)
compile
,
inplace
)
#, constant, eval_outputs)
#, constant, eval_outputs)
...
@@ -361,11 +353,8 @@ class t_gemm(TestCase):
...
@@ -361,11 +353,8 @@ class t_gemm(TestCase):
z
=
tz
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
z
=
tz
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
z
[:,
:,
i
]
=
z_i
z
[:,
:,
i
]
=
z_i
self
.
assertTrue
(
unittest_tools
.
assert_allclose
(
z_after
[:,
:,
i
],
_approx_eq
(
z_after
[:,
:,
i
],
tz
.
get_value
(
borrow
=
True
)[:,
:,
i
])
tz
.
get_value
(
borrow
=
True
)[:,
:,
i
]),
(
z_orig
[:,
:,
i
],
z_after
[:,
:,
i
],
z
[:,
:,
i
],
z_after
[:,
:,
i
]
-
z
[:,
:,
i
]))
tz_i
=
gemm_no_inplace
(
tz
[:,
:,
i
],
ta
,
tx
[
tz_i
=
gemm_no_inplace
(
tz
[:,
:,
i
],
ta
,
tx
[
:,
:,
i
],
ty
[:,
:,
i
],
tb
)
:,
:,
i
],
ty
[:,
:,
i
],
tb
)
...
@@ -374,11 +363,8 @@ class t_gemm(TestCase):
...
@@ -374,11 +363,8 @@ class t_gemm(TestCase):
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
l
))
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
l
))
for
j
in
xrange
(
3
):
for
j
in
xrange
(
3
):
g_i
()
g_i
()
self
.
assertTrue
(
unittest_tools
.
assert_allclose
(
z_after
[:,
:,
i
],
_approx_eq
(
z_after
[:,
:,
i
],
tz
.
get_value
(
borrow
=
True
)[:,
:,
i
])
tz
.
get_value
(
borrow
=
True
)[:,
:,
i
]),
(
z_orig
[:,
:,
i
],
z_after
[:,
:,
i
],
z
[:,
:,
i
],
z_after
[:,
:,
i
]
-
z
[:,
:,
i
]))
t
(
C
,
A
,
B
)
t
(
C
,
A
,
B
)
t
(
C
.
transpose
((
1
,
0
,
2
)),
A
,
B
)
t
(
C
.
transpose
((
1
,
0
,
2
)),
A
,
B
)
...
...
theano/tensor/tests/test_inc_subtensor.py
浏览文件 @
9950ce08
...
@@ -54,7 +54,7 @@ class Test_inc_subtensor(unittest.TestCase):
...
@@ -54,7 +54,7 @@ class Test_inc_subtensor(unittest.TestCase):
else
:
else
:
expected_result
[:,
:
val_sl2_end
]
+=
val_inc
expected_result
[:,
:
val_sl2_end
]
+=
val_inc
self
.
assertTrue
(
numpy
.
array_equal
(
result
,
expected_result
)
)
utt
.
assert_allclose
(
result
,
expected_result
)
def
test_wrong_dims
(
self
):
def
test_wrong_dims
(
self
):
a
=
tt
.
matrix
()
a
=
tt
.
matrix
()
...
@@ -122,7 +122,7 @@ class Test_inc_subtensor(unittest.TestCase):
...
@@ -122,7 +122,7 @@ class Test_inc_subtensor(unittest.TestCase):
else
:
else
:
expected_result
[:,
sl3
,
:
val_sl2_end
]
+=
val_inc
expected_result
[:,
sl3
,
:
val_sl2_end
]
+=
val_inc
self
.
assertTrue
(
numpy
.
array_equal
(
result
,
expected_result
)
)
utt
.
assert_allclose
(
result
,
expected_result
)
def
test_grad_inc_set
(
self
):
def
test_grad_inc_set
(
self
):
def
inc_slice
(
*
s
):
def
inc_slice
(
*
s
):
...
...
theano/tensor/type.py
浏览文件 @
9950ce08
...
@@ -446,8 +446,9 @@ class TensorType(Type):
...
@@ -446,8 +446,9 @@ class TensorType(Type):
%(fail)
s
%(fail)
s
}
}
// We expect
%(type_num)
s
// We expect
%(type_num)
s
type_num_
%(name)
s = ((PyArrayObject*)py_
%(name)
s)->descr->type_num;
type_num_
%(name)
s = PyArray_TYPE((PyArrayObject*) py_
%(name)
s);
if (!PyArray_ISALIGNED(py_
%(name)
s)) {
if (!PyArray_ISALIGNED((PyArrayObject*) py_
%(name)
s)) {
PyArrayObject * tmp = (PyArrayObject*) py_
%(name)
s;
PyErr_Format(PyExc_NotImplementedError,
PyErr_Format(PyExc_NotImplementedError,
"expected an aligned array of type
%%
ld "
"expected an aligned array of type
%%
ld "
"(
%(type_num)
s), got non-aligned array of type
%%
ld"
"(
%(type_num)
s), got non-aligned array of type
%%
ld"
...
@@ -456,19 +457,19 @@ class TensorType(Type):
...
@@ -456,19 +457,19 @@ class TensorType(Type):
" and 3 last strides
%%
ld
%%
ld,
%%
ld.",
" and 3 last strides
%%
ld
%%
ld,
%%
ld.",
(long int)
%(type_num)
s,
(long int)
%(type_num)
s,
(long int) type_num_
%(name)
s,
(long int) type_num_
%(name)
s,
(long int) PyArray_NDIM(
py_
%(name)
s
),
(long int) PyArray_NDIM(
tmp
),
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 3 ?
(long int) PyArray_NDIM(
tmp
) >= 3 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-3] : -1,
PyArray_DIMS(
tmp)[PyArray_NDIM(tmp
)-3] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 2 ?
(long int) PyArray_NDIM(
tmp
) >= 2 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-2] : -1,
PyArray_DIMS(
tmp)[PyArray_NDIM(tmp
)-2] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 1 ?
(long int) PyArray_NDIM(
tmp
) >= 1 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-1] : -1,
PyArray_DIMS(
tmp)[PyArray_NDIM(tmp
)-1] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 3 ?
(long int) PyArray_NDIM(
tmp
) >= 3 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-3] : -1,
PyArray_STRIDES(
tmp)[PyArray_NDIM(tmp
)-3] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 2 ?
(long int) PyArray_NDIM(
tmp
) >= 2 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-2] : -1,
PyArray_STRIDES(
tmp)[PyArray_NDIM(tmp
)-2] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s
) >= 1 ?
(long int) PyArray_NDIM(
tmp
) >= 1 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s
)-1] : -1
PyArray_STRIDES(
tmp)[PyArray_NDIM(tmp
)-1] : -1
);
);
%(fail)
s
%(fail)
s
}
}
...
@@ -508,7 +509,7 @@ class TensorType(Type):
...
@@ -508,7 +509,7 @@ class TensorType(Type):
{Py_XINCREF(py_
%(name)
s);}
{Py_XINCREF(py_
%(name)
s);}
if (
!PyArray_ISALIGNED(
py_
%(name)
s)) {
if (
%(name)
s && !PyArray_ISALIGNED((PyArrayObject*)
py_
%(name)
s)) {
PyErr_Format(PyExc_NotImplementedError,
PyErr_Format(PyExc_NotImplementedError,
"c_sync: expected an aligned array of type
%%
ld "
"c_sync: expected an aligned array of type
%%
ld "
"(
%(type_num)
s), got non-aligned array of type
%%
ld"
"(
%(type_num)
s), got non-aligned array of type
%%
ld"
...
@@ -517,19 +518,19 @@ class TensorType(Type):
...
@@ -517,19 +518,19 @@ class TensorType(Type):
" and 3 last strides
%%
ld
%%
ld,
%%
ld.",
" and 3 last strides
%%
ld
%%
ld,
%%
ld.",
(long int)
%(type_num)
s,
(long int)
%(type_num)
s,
(long int) type_num_
%(name)
s,
(long int) type_num_
%(name)
s,
(long int) PyArray_NDIM(
py_
%(name)
s),
(long int) PyArray_NDIM(
%(name)
s),
(long int) PyArray_NDIM(
py_
%(name)
s) >= 3 ?
(long int) PyArray_NDIM(
%(name)
s) >= 3 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-3] : -1,
PyArray_DIMS(
%(name)
s)[PyArray_NDIM(
%(name)
s)-3] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s) >= 2 ?
(long int) PyArray_NDIM(
%(name)
s) >= 2 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-2] : -1,
PyArray_DIMS(
%(name)
s)[PyArray_NDIM(
%(name)
s)-2] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s) >= 1 ?
(long int) PyArray_NDIM(
%(name)
s) >= 1 ?
PyArray_DIMS(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-1] : -1,
PyArray_DIMS(
%(name)
s)[PyArray_NDIM(
%(name)
s)-1] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s) >= 3 ?
(long int) PyArray_NDIM(
%(name)
s) >= 3 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-3] : -1,
PyArray_STRIDES(
%(name)
s)[PyArray_NDIM(
%(name)
s)-3] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s) >= 2 ?
(long int) PyArray_NDIM(
%(name)
s) >= 2 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-2] : -1,
PyArray_STRIDES(
%(name)
s)[PyArray_NDIM(
%(name)
s)-2] : -1,
(long int) PyArray_NDIM(
py_
%(name)
s) >= 1 ?
(long int) PyArray_NDIM(
%(name)
s) >= 1 ?
PyArray_STRIDES(
py_
%(name)
s)[PyArray_NDIM(py_
%(name)
s)-1] : -1
PyArray_STRIDES(
%(name)
s)[PyArray_NDIM(
%(name)
s)-1] : -1
);
);
%(fail)
s
%(fail)
s
}
}
...
@@ -555,7 +556,7 @@ class TensorType(Type):
...
@@ -555,7 +556,7 @@ class TensorType(Type):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
scalar_version
=
scal
.
Scalar
(
self
.
dtype
)
.
c_code_cache_version
()
scalar_version
=
scal
.
Scalar
(
self
.
dtype
)
.
c_code_cache_version
()
if
scalar_version
:
if
scalar_version
:
return
(
1
0
,)
+
scalar_version
return
(
1
1
,)
+
scalar_version
else
:
else
:
return
()
return
()
...
...
theano/tests/test_tutorial.py
浏览文件 @
9950ce08
...
@@ -919,9 +919,9 @@ class Fibby(theano.Op):
...
@@ -919,9 +919,9 @@ class Fibby(theano.Op):
if (!
%(y)
s)
if (!
%(y)
s)
%(fail)
s;
%(fail)
s;
{//New scope needed to make compilation work
{//New scope needed to make compilation work
dtype_
%(y)
s * y = (dtype_
%(y)
s*)
%(y)
s->data
;
dtype_
%(y)
s * y = (dtype_
%(y)
s*)
PyArray_DATA(
%(y)
s)
;
dtype_
%(x)
s * x = (dtype_
%(x)
s*)
%(x)
s->data
;
dtype_
%(x)
s * x = (dtype_
%(x)
s*)
PyArray_DATA(
%(x)
s)
;
for (int i = 2; i <
%(x)
s->dimensions
[0]; ++i)
for (int i = 2; i <
PyArray_DIMS(
%(x)
s)
[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
y[i] = y[i-1]*y[i-2] + x[i];
}
}
"""
%
locals
()
"""
%
locals
()
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论