Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4185ef0c
提交
4185ef0c
authored
10月 26, 2016
作者:
notoraptor
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
New update.
Code-comment removed from test_corr. Code factorized in alt_gemm_template.c All tests passed.
上级
bb25e997
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
68 行增加
和
52 行删除
+68
-52
alt_gemm_template.c
theano/tensor/alt_gemm_template.c
+68
-51
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+0
-1
没有找到文件。
theano/tensor/alt_gemm_template.c
浏览文件 @
4185ef0c
...
...
@@ -22,14 +22,15 @@ void alt_numpy_scale_matrix_inplace_%(float_type)s(%(float_type)s scalar, PyArra
}
while
(
get_next
(
iterator
));
NpyIter_Deallocate
(
iterator
);
}
/*Matrix+Matrix function. Compute (coeffA * matrixA) + (coeffB * matrixB)
* Remark: This function actually sums a C-contiguous matrix (alpha*op(A)*op(B)) with a F-contiguous matrix (beta*C)
* (see gemm implementation at next function for more details) */
/*Matrix+Matrix function.
* Computes (scalar1 * matrix1) + (scalar2 * matrix2)
* and puts the result into matrix2:
* matrix2 = (scalar1 * matrix1) + (scalar2 * matrix2) */
void
alt_numpy_matrix_extended_sum_inplace_
%
(
float_type
)
s
(
const
%
(
float_type
)
s
*
ALPHA
,
PyArrayObject
*
A
,
const
%
(
float_type
)
s
*
BETA
,
PyArrayObject
*
B
const
%
(
float_type
)
s
*
scalar1
,
PyArrayObject
*
matrix1
,
const
%
(
float_type
)
s
*
scalar2
,
PyArrayObject
*
matrix2
)
{
PyArrayObject
*
op
[
2
]
=
{
A
,
B
};
PyArrayObject
*
op
[
2
]
=
{
matrix1
,
matrix2
};
npy_uint32
op_flags
[
2
]
=
{
NPY_ITER_READONLY
,
NPY_ITER_READWRITE
};
npy_uint32
flags
=
0
;
NpyIter
*
iterators
=
NpyIter_MultiNew
(
...
...
@@ -40,9 +41,9 @@ void alt_numpy_matrix_extended_sum_inplace_%(float_type)s(
NpyIter_IterNextFunc
*
get_next
=
NpyIter_GetIterNext
(
iterators
,
NULL
);
char
**
data_ptr_array
=
NpyIter_GetDataPtrArray
(
iterators
);
do
{
%
(
float_type
)
s
*
from_
A
=
(
%
(
float_type
)
s
*
)
data_ptr_array
[
0
];
%
(
float_type
)
s
*
from_
B
=
(
%
(
float_type
)
s
*
)
data_ptr_array
[
1
];
*
from_
B
=
(
*
ALPHA
)
*
(
*
from_A
)
+
(
*
BETA
)
*
(
*
from_B
);
%
(
float_type
)
s
*
from_
matrix1
=
(
%
(
float_type
)
s
*
)
data_ptr_array
[
0
];
%
(
float_type
)
s
*
from_
matrix2
=
(
%
(
float_type
)
s
*
)
data_ptr_array
[
1
];
*
from_
matrix2
=
(
*
scalar1
)
*
(
*
from_matrix1
)
+
(
*
scalar2
)
*
(
*
from_matrix2
);
}
while
(
get_next
(
iterators
));
NpyIter_Deallocate
(
iterators
);
}
...
...
@@ -75,61 +76,77 @@ void %(name)s(
%
(
float_type
)
s
*
C
,
const
int
*
LDC
)
{
if
(
*
M
<
0
||
*
N
<
0
||
*
K
<
0
||
*
LDA
<
0
||
*
LDB
<
0
||
*
LDC
<
0
)
alt_fatal_error
(
"The integer arguments passed to %(name)s must all be at least 0."
);
/* NB: it seems that matrix+matrix and scalar*matrix functions
* defined above do not allocate iterator for a matrix with 0
* content, that is a matrix whose nrow*ncol == 0. As these
* functions actually work with M*N matrices (op(A)*op(B) and/or C),
* I think that we could just return if M or N is null. */
/* If M or N is null, there is nothing to do with C,
* as C should contain M*N == 0 items. */
if
(
*
M
==
0
||
*
N
==
0
)
return
;
int
nrowa
,
ncola
,
nrowb
,
ncolb
;
int
is_A_transposable
=
alt_trans_to_bool
(
TRANSA
);
int
is_B_transposable
=
alt_trans_to_bool
(
TRANSB
);
if
(
is_A_transposable
)
{
nrowa
=
*
K
;
ncola
=
*
M
;
int
to_transpose_A
=
alt_trans_to_bool
(
TRANSA
);
int
to_transpose_B
=
alt_trans_to_bool
(
TRANSB
);
if
(
to_transpose_A
)
{
nrowa
=
*
K
;
ncola
=
*
M
;
}
else
{
nrowa
=
*
M
;
ncola
=
*
K
;
nrowa
=
*
M
;
ncola
=
*
K
;
}
if
(
is_B_transposable
)
{
nrowb
=
*
N
;
ncolb
=
*
K
;
if
(
to_transpose_B
)
{
nrowb
=
*
N
;
ncolb
=
*
K
;
}
else
{
nrowb
=
*
K
;
ncolb
=
*
N
;
nrowb
=
*
K
;
ncolb
=
*
N
;
}
int
computation_flags
;
void
*
computation_pointer
;
npy_intp
*
computation_strides
;
npy_intp
computation_dims
[
2
]
=
{
*
N
,
*
M
};
npy_intp
default_computation_strides
[
2
]
=
{(
*
LDC
)
*
%
(
float_size
)
d
,
%
(
float_size
)
d
};
if
(
*
BETA
==
0
)
{
/*C is never read, just written, so it will be directly used
* to store the result of ALPHA*op(A)*op(B). */
computation_flags
=
NPY_ARRAY_C_CONTIGUOUS
|
NPY_ARRAY_WRITEABLE
;
computation_pointer
=
C
;
computation_strides
=
default_computation_strides
;
}
else
{
/* C will be read, so the must allocate a new memory buffer
* to compute op(A)*op(B). */
computation_flags
=
0
;
computation_pointer
=
NULL
;
computation_strides
=
NULL
;
}
/* The memory buffer used to compute op(A)*op(B) (either C or
* new allocated buffer) will be considered as C-contiguous because
* the 3rd parameter of PyArray_MatrixProduct2 (used below)
* expects a C-contiguous array.
* Also, to avoid some memory copy, transposition conditions
* for A and B will be reversed, so that the buffer will contain
* C-contiguous opB_transposed * opA_transposed (N*M matrix).
* After that, the code that uses the buffer (either the code calling
* this function, or this function if BETA != 0) just has to
* consider the buffer as a F-contiguous M*N matrix, so that
* it will get the transposed of op_B_transposed * op_A_transposed,
* that is op_A * op_B (M*N matrix) as expected. */
PyObject
*
opA_transposed
=
alt_op_without_copy_
%
(
float_type
)
s
(
!
to_transpose_A
,
A
,
nrowa
,
ncola
,
*
LDA
);
PyObject
*
opB_transposed
=
alt_op_without_copy_
%
(
float_type
)
s
(
!
to_transpose_B
,
B
,
nrowb
,
ncolb
,
*
LDB
);
PyObject
*
opB_trans_dot_opA_trans
=
PyArray_New
(
&
PyArray_Type
,
2
,
computation_dims
,
%
(
npy_float
)
s
,
computation_strides
,
computation_pointer
,
0
,
computation_flags
,
NULL
);
PyArray_MatrixProduct2
(
opB_transposed
,
opA_transposed
,
(
PyArrayObject
*
)
opB_trans_dot_opA_trans
);
if
(
*
BETA
==
0
)
{
/*C is never read, just written.
* matrix_C will be created as C-contiguous because
* the 3rd parameter of PyArray_MatrixProduct2 (used below)
* expects a C-contiguous array.
* Also, to avoid some memory copy, transposition conditions
* for A and B will be reversed, so that C will contain
* C-contiguous op_B_transposed * op_A_transposed (N*M matrix).
* As the calling code will consider C as F-contiguous M*N matrix,
* it will get the transposed of op_B_transposed * op_A_transposed,
* that is op_A * op_B (M*N matrix) as expected. */
npy_intp
dims_C
[
2
]
=
{
*
N
,
*
M
};
npy_intp
strides_C
[
2
]
=
{(
*
M
)
*
%
(
float_size
)
d
,
%
(
float_size
)
d
};
PyObject
*
matrix_C
=
PyArray_New
(
&
PyArray_Type
,
2
,
dims_C
,
%
(
npy_float
)
s
,
strides_C
,
C
,
0
,
NPY_ARRAY_C_CONTIGUOUS
|
NPY_ARRAY_WRITEABLE
,
NULL
);
PyObject
*
op_A_transposed
=
alt_op_without_copy_
%
(
float_type
)
s
(
!
is_A_transposable
,
A
,
nrowa
,
ncola
,
*
LDA
);
PyObject
*
op_B_transposed
=
alt_op_without_copy_
%
(
float_type
)
s
(
!
is_B_transposable
,
B
,
nrowb
,
ncolb
,
*
LDB
);
PyArray_MatrixProduct2
(
op_B_transposed
,
op_A_transposed
,
(
PyArrayObject
*
)
matrix_C
);
if
(
*
ALPHA
!=
1
.
0
)
alt_numpy_scale_matrix_inplace_
%
(
float_type
)
s
(
*
ALPHA
,
(
PyArrayObject
*
)
matrix_C
);
Py_XDECREF
(
op_B_transposed
);
Py_XDECREF
(
op_A_transposed
);
Py_XDECREF
(
matrix_C
);
alt_numpy_scale_matrix_inplace_
%
(
float_type
)
s
(
*
ALPHA
,
(
PyArrayObject
*
)
opB_trans_dot_opA_trans
);
}
else
{
/*
C is read, so we must consider it as F-contiguous, as we do for A and B.
*/
/*
C is read, so we must consider it as F-contiguous.
*/
npy_intp
dims_C
[
2
]
=
{
*
M
,
*
N
};
npy_intp
strides_C
[
2
]
=
{
%
(
float_size
)
d
,
(
*
LDC
)
*
%
(
float_size
)
d
};
PyObject
*
matrix_C
=
PyArray_New
(
&
PyArray_Type
,
2
,
dims_C
,
%
(
npy_float
)
s
,
strides_C
,
C
,
0
,
NPY_ARRAY_F_CONTIGUOUS
|
NPY_ARRAY_WRITEABLE
,
NULL
);
PyObject
*
op_A
=
alt_op_without_copy_
%
(
float_type
)
s
(
is_A_transposable
,
A
,
nrowa
,
ncola
,
*
LDA
);
PyObject
*
op_B
=
alt_op_without_copy_
%
(
float_type
)
s
(
is_B_transposable
,
B
,
nrowb
,
ncolb
,
*
LDB
);
PyArrayObject
*
op_A_times_op_B
=
(
PyArrayObject
*
)
PyArray_MatrixProduct
(
op_A
,
op_B
);
alt_numpy_matrix_extended_sum_inplace_
%
(
float_type
)
s
(
ALPHA
,
op_A_times_op_B
,
BETA
,
(
PyArrayObject
*
)
matrix_C
);
/*C is already F-contiguous, thus no conversion needed for output.*/
Py_XDECREF
(
op_A_times_op_B
);
Py_XDECREF
(
op_B
);
Py_XDECREF
(
op_A
);
/* Hack: if we use alt_op_without_copy with the right parameters,
* it will returns the transposed version of opB_trans_dot_opA_trans. */
PyObject
*
opA_dot_opB
=
alt_op_without_copy_
%
(
float_type
)
s
(
0
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
((
PyArrayObject
*
)
opB_trans_dot_opA_trans
),
*
M
,
*
N
,
*
M
);
alt_numpy_matrix_extended_sum_inplace_
%
(
float_type
)
s
(
ALPHA
,
(
PyArrayObject
*
)
opA_dot_opB
,
BETA
,
(
PyArrayObject
*
)
matrix_C
);
Py_XDECREF
(
opA_dot_opB
);
Py_XDECREF
(
matrix_C
);
}
Py_XDECREF
(
opB_trans_dot_opA_trans
);
Py_XDECREF
(
opB_transposed
);
Py_XDECREF
(
opA_transposed
);
}
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
4185ef0c
...
...
@@ -130,7 +130,6 @@ class TestCorr2D(utt.InferShapeTester):
icol
:
icol
+
dil_fil_shape2d
[
1
]:
filter_dilation
[
1
]]
*
filter2d
[::
-
1
,
::
-
1
]
)
.
sum
()
# utt.assert_allclose(theano_output, ref_output)
utt
.
assert_allclose
(
ref_output
,
theano_output
)
# TEST GRADIENT
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论