Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1f6b0c73
提交
1f6b0c73
authored
2月 20, 2012
作者:
nouiz
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #437 from lamblin/blas_double_strides
Make blas functions work with all stride patterns
上级
b96c7d5c
3316ba1e
全部展开
显示空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
243 行增加
和
87 行删除
+243
-87
blas.py
theano/sandbox/cuda/blas.py
+83
-56
cuda_ndarray.cu
theano/sandbox/cuda/cuda_ndarray.cu
+0
-0
cuda_ndarray.cuh
theano/sandbox/cuda/cuda_ndarray.cuh
+21
-12
test_blas.py
theano/sandbox/cuda/tests/test_blas.py
+86
-7
blas.py
theano/tensor/blas.py
+10
-4
blas_c.py
theano/tensor/blas_c.py
+0
-0
blas_scipy.py
theano/tensor/blas_scipy.py
+6
-1
test_blas.py
theano/tensor/tests/test_blas.py
+0
-0
test_blas_c.py
theano/tensor/tests/test_blas_c.py
+31
-6
test_blas_scipy.py
theano/tensor/tests/test_blas_scipy.py
+6
-1
没有找到文件。
theano/sandbox/cuda/blas.py
浏览文件 @
1f6b0c73
...
@@ -192,7 +192,7 @@ class GpuGemm(GpuOp):
...
@@ -192,7 +192,7 @@ class GpuGemm(GpuOp):
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
,)
return
(
4
,)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
#z_out = alpha * dot(x,y) + beta * z_in
#z_out = alpha * dot(x,y) + beta * z_in
...
@@ -200,6 +200,7 @@ class GpuGemm(GpuOp):
...
@@ -200,6 +200,7 @@ class GpuGemm(GpuOp):
#not inplace version, we copy z_in to z_out.
#not inplace version, we copy z_in to z_out.
z_in
,
a
,
x
,
y
,
b
=
inputs
z_in
,
a
,
x
,
y
,
b
=
inputs
z_out
,
=
outputs
z_out
,
=
outputs
inplace
=
int
(
self
.
inplace
)
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
sio
=
StringIO
.
StringIO
()
sio
=
StringIO
.
StringIO
()
...
@@ -215,39 +216,50 @@ class GpuGemm(GpuOp):
...
@@ -215,39 +216,50 @@ class GpuGemm(GpuOp):
: (REAL)(((double*)
%(b)
s->data)[0]);
: (REAL)(((double*)
%(b)
s->data)[0]);
#undef REAL
#undef REAL
"""
if (
%(inplace)
s
if
self
.
inplace
:
&& (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] >= 0)
print
>>
sio
,
"""
&& (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[1] >= 0)
&& ((CudaNdarray_HOST_DIMS(
%(z_in)
s)[0] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] == 1)
|| (CudaNdarray_HOST_DIMS(
%(z_in)
s)[1] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[1] == 1)))
{
// The input has an appropriate layout, we work inplace
Py_XDECREF(
%(z_out)
s);
Py_XDECREF(
%(z_out)
s);
%(z_out)
s =
%(z_in)
s;
%(z_out)
s =
%(z_in)
s;
Py_INCREF(
%(z_out)
s);
Py_INCREF(
%(z_out)
s);
"""
}
else
:
else if (
%(z_out)
s
print
>>
sio
,
"""
&& (
%(z_out)
s->nd == 2)
if (!
%(z_out)
s
&& (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0]
|| (
%(z_out)
s->nd != 2)
== CudaNdarray_HOST_DIMS(
%(z_in)
s)[0])
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] != CudaNdarray_HOST_DIMS(
%(z_in)
s)[0])
&& (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1]
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1] != CudaNdarray_HOST_DIMS(
%(z_in)
s)[1])
== CudaNdarray_HOST_DIMS(
%(z_in)
s)[1])
)
&& (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] >= 0)
&& (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[1] >= 0)
&& ((CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] == 1)
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[1] == 1)))
{
{
Py_XDECREF(
%(z_out)
s);
// The existing output has an appropriate layout,
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
// copy the input data into it, then work inplace
if (!
%(z_out)
s
)
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s)
)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
else
else
{
{
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s))
// Copy the input, use the copy as output
Py_XDECREF(
%(z_out)
s);
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
if (!
%(z_out)
s)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
"""
print
>>
sio
,
"""
if (CudaNdarray_gemm(
%(name)
s_a,
%(x)
s,
%(y)
s,
%(name)
s_b,
%(z_out)
s))
if (CudaNdarray_gemm(
%(name)
s_a,
%(x)
s,
%(y)
s,
%(name)
s_b,
%(z_out)
s))
{
{
%(fail)
s;
%(fail)
s;
...
@@ -294,7 +306,7 @@ class GpuGemv(GpuOp):
...
@@ -294,7 +306,7 @@ class GpuGemv(GpuOp):
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
2
,)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
#z_out = alpha * dot(x,y) + beta * z_in
#z_out = alpha * dot(x,y) + beta * z_in
...
@@ -302,44 +314,46 @@ class GpuGemv(GpuOp):
...
@@ -302,44 +314,46 @@ class GpuGemv(GpuOp):
#not inplace version, we copy z_in to z_out.
#not inplace version, we copy z_in to z_out.
z_in
,
a
,
x
,
y
,
b
=
inputs
z_in
,
a
,
x
,
y
,
b
=
inputs
z_out
,
=
outputs
z_out
,
=
outputs
inplace
=
int
(
self
.
inplace
)
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
sio
=
StringIO
.
StringIO
()
sio
=
StringIO
.
StringIO
()
print
>>
sio
,
"""
print
>>
sio
,
"""
float
%(name)
s_alpha = ((dtype_
%(a)
s*)(
%(a)
s->data))[0];
float
%(name)
s_alpha = ((dtype_
%(a)
s*)(
%(a)
s->data))[0];
float
%(name)
s_beta = ((dtype_
%(b)
s*)(
%(b)
s->data))[0];
float
%(name)
s_beta = ((dtype_
%(b)
s*)(
%(b)
s->data))[0];
"""
if
self
.
inplace
:
if (
%(inplace)
s
print
>>
sio
,
"""
&& ((CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] > 0)
|| ((CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] == 0)
&& (CudaNdarray_HOST_DIMS(
%(z_in)
s)[0] == 1))))
{
// Work inplace on the input
Py_XDECREF(
%(z_out)
s);
Py_XDECREF(
%(z_out)
s);
%(z_out)
s =
%(z_in)
s;
%(z_out)
s =
%(z_in)
s;
Py_INCREF(
%(z_out)
s);
Py_INCREF(
%(z_out)
s);
"""
}
else
:
else if (
%(z_out)
s
print
>>
sio
,
"""
&& ((CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] > 0)
if (!
%(z_out)
s
|| ((CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] == 0)
|| (
%(z_out)
s->nd != 1)
&& (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] == 1))))
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] != CudaNdarray_HOST_DIMS(
%(z_in)
s)[0])
)
{
{
Py_XDECREF(
%(z_out)
s);
// Work on the output
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s))
if (!
%(z_out)
s)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
else
else
{
{
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s))
// Copy
Py_XDECREF(
%(z_out)
s);
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
if (!
%(z_out)
s)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
"""
print
>>
sio
,
"""
if (CudaNdarray_sgemv(
%(name)
s_alpha,
%(x)
s,
%(y)
s,
%(name)
s_beta,
%(z_out)
s))
if (CudaNdarray_sgemv(
%(name)
s_alpha,
%(x)
s,
%(y)
s,
%(name)
s_beta,
%(z_out)
s))
{
{
%(fail)
s;
%(fail)
s;
...
@@ -385,7 +399,7 @@ class GpuGer(GpuOp):
...
@@ -385,7 +399,7 @@ class GpuGer(GpuOp):
return
Apply
(
self
,
[
z
,
a
,
x
,
y
],
[
z
.
type
()])
return
Apply
(
self
,
[
z
,
a
,
x
,
y
],
[
z
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
2
,)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
#z_out = alpha * dot(x,y) + beta * z_in
#z_out = alpha * dot(x,y) + beta * z_in
...
@@ -393,44 +407,57 @@ class GpuGer(GpuOp):
...
@@ -393,44 +407,57 @@ class GpuGer(GpuOp):
#not inplace version, we copy z_in to z_out.
#not inplace version, we copy z_in to z_out.
z_in
,
a
,
x
,
y
=
inputs
z_in
,
a
,
x
,
y
=
inputs
z_out
,
=
outputs
z_out
,
=
outputs
inplace
=
int
(
self
.
inplace
)
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
sio
=
StringIO
.
StringIO
()
sio
=
StringIO
.
StringIO
()
print
>>
sio
,
"""
print
>>
sio
,
"""
float
%(name)
s_alpha = ((dtype_
%(a)
s*)(
%(a)
s->data))[0];
float
%(name)
s_alpha = ((dtype_
%(a)
s*)(
%(a)
s->data))[0];
"""
if
self
.
inplace
:
if (
%(inplace)
s
print
>>
sio
,
"""
&& (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] >= 0)
&& (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[1] >= 0)
&& ((CudaNdarray_HOST_DIMS(
%(z_in)
s)[0] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[0] == 1)
|| (CudaNdarray_HOST_DIMS(
%(z_in)
s)[1] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_in)
s)[1] == 1)))
{
// The input has an appropriate layout, we work inplace
Py_XDECREF(
%(z_out)
s);
Py_XDECREF(
%(z_out)
s);
%(z_out)
s =
%(z_in)
s;
%(z_out)
s =
%(z_in)
s;
Py_INCREF(
%(z_out)
s);
Py_INCREF(
%(z_out)
s);
"""
}
else
:
else if (
%(z_out)
s
print
>>
sio
,
"""
&& (
%(z_out)
s->nd == 2)
if (!
%(z_out)
s
&& (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0]
|| (
%(z_out)
s->nd != 2)
== CudaNdarray_HOST_DIMS(
%(z_in)
s)[0])
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] != CudaNdarray_HOST_DIMS(
%(z_in)
s)[0])
&& (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1]
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1] != CudaNdarray_HOST_DIMS(
%(z_in)
s)[1])
== CudaNdarray_HOST_DIMS(
%(z_in)
s)[1])
)
&& (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] >= 0)
&& (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[1] >= 0)
&& ((CudaNdarray_HOST_DIMS(
%(z_out)
s)[0] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[0] == 1)
|| (CudaNdarray_HOST_DIMS(
%(z_out)
s)[1] <= 1)
|| (CudaNdarray_HOST_STRIDES(
%(z_out)
s)[1] == 1)))
{
{
Py_XDECREF(
%(z_out)
s);
// The existing output has an appropriate layout,
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
// copy the input data into it, then work inplace
if (!
%(z_out)
s
)
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s)
)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
else
else
{
{
if (CudaNdarray_CopyFromCudaNdarray(
%(z_out)
s,
%(z_in)
s))
// Copy the input, use the copy as output
Py_XDECREF(
%(z_out)
s);
%(z_out)
s = (CudaNdarray*)CudaNdarray_Copy(
%(z_in)
s);
if (!
%(z_out)
s)
{
{
%(fail)
s;
%(fail)
s;
}
}
}
}
"""
print
>>
sio
,
"""
if (CudaNdarray_sger(
%(name)
s_alpha,
%(x)
s,
%(y)
s,
%(z_out)
s))
if (CudaNdarray_sger(
%(name)
s_alpha,
%(x)
s,
%(y)
s,
%(z_out)
s))
{
{
%(fail)
s;
%(fail)
s;
...
...
theano/sandbox/cuda/cuda_ndarray.cu
浏览文件 @
1f6b0c73
差异被折叠。
点击展开。
theano/sandbox/cuda/cuda_ndarray.cuh
浏览文件 @
1f6b0c73
...
@@ -81,7 +81,7 @@ struct CudaNdarray
...
@@ -81,7 +81,7 @@ struct CudaNdarray
//device pointers (allocated by cudaMalloc)
//device pointers (allocated by cudaMalloc)
int
dev_structure_fresh
;
mutable
int
dev_structure_fresh
;
//dev_structure should be accessed via macros, otherwise may not be synchronized
//dev_structure should be accessed via macros, otherwise may not be synchronized
int
*
dev_structure
;
//dim0, dim1, ..., stride0, stride1, ...
int
*
dev_structure
;
//dim0, dim1, ..., stride0, stride1, ...
real
*
devdata
;
//pointer to data element [0,..,0].
real
*
devdata
;
//pointer to data element [0,..,0].
...
@@ -154,11 +154,11 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s);
...
@@ -154,11 +154,11 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s);
*
*
* This means: recalculate the log2dims and transfer structure to the card
* This means: recalculate the log2dims and transfer structure to the card
*/
*/
DllExport
int
cnda_copy_structure_to_device
(
CudaNdarray
*
self
);
DllExport
int
cnda_copy_structure_to_device
(
const
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_DIMS
(
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_DIMS
(
const
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_STRIDES
(
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_STRIDES
(
const
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_LOG2DIMS
(
CudaNdarray
*
self
);
DllExport
const
int
*
CudaNdarray_DEV_LOG2DIMS
(
const
CudaNdarray
*
self
);
DllExport
float
*
CudaNdarray_DEV_DATA
(
const
CudaNdarray
*
self
);
DllExport
float
*
CudaNdarray_DEV_DATA
(
const
CudaNdarray
*
self
);
/**
/**
...
@@ -229,13 +229,22 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
...
@@ -229,13 +229,22 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
return
-
1
;
return
-
1
;
}
}
assert
(
size
>
0
);
if
(
size
<
0
)
{
PyErr_Format
(
PyExc_AssertionError
,
"size (%i) < 0"
,
size
);
return
-
1
;
}
self
->
devdata
=
(
float
*
)
device_malloc
(
size
*
sizeof
(
real
));
self
->
devdata
=
(
float
*
)
device_malloc
(
size
*
sizeof
(
real
));
if
(
!
self
->
devdata
)
if
(
size
&&
!
self
->
devdata
)
{
{
CudaNdarray_set_nd
(
self
,
-
1
);
CudaNdarray_set_nd
(
self
,
-
1
);
self
->
data_allocated
=
0
;
self
->
data_allocated
=
0
;
self
->
devdata
=
0
;
self
->
devdata
=
0
;
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate memory on device"
);
return
-
1
;
return
-
1
;
}
}
if
(
0
)
if
(
0
)
...
@@ -283,7 +292,7 @@ DllExport PyObject * CudaNdarray_DeepCopy(CudaNdarray * self, PyObject * memo);
...
@@ -283,7 +292,7 @@ DllExport PyObject * CudaNdarray_DeepCopy(CudaNdarray * self, PyObject * memo);
/**
/**
* Return an independent copy of self
* Return an independent copy of self
*/
*/
DllExport
PyObject
*
CudaNdarray_Copy
(
CudaNdarray
*
self
);
DllExport
PyObject
*
CudaNdarray_Copy
(
const
CudaNdarray
*
self
);
/**
/**
* Return a new object obtained by summing over the dimensions for which there is a 1 in the mask.
* Return a new object obtained by summing over the dimensions for which there is a 1 in the mask.
...
@@ -302,7 +311,7 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
...
@@ -302,7 +311,7 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
*
*
* self is reallocated to have the correct dimensions if necessary.
* self is reallocated to have the correct dimensions if necessary.
*/
*/
DllExport
int
CudaNdarray_CopyFromCudaNdarray
(
CudaNdarray
*
self
,
CudaNdarray
*
other
,
bool
unbroadcast
=
false
);
DllExport
int
CudaNdarray_CopyFromCudaNdarray
(
CudaNdarray
*
self
,
const
CudaNdarray
*
other
,
bool
unbroadcast
=
false
);
/**
/**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
...
@@ -321,7 +330,7 @@ DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self);
...
@@ -321,7 +330,7 @@ DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self);
DllExport
int
CudaNdarray_gemm
(
float
alpha
,
const
CudaNdarray
*
A
,
const
CudaNdarray
*
B
,
float
beta
,
CudaNdarray
*
C
);
DllExport
int
CudaNdarray_gemm
(
float
alpha
,
const
CudaNdarray
*
A
,
const
CudaNdarray
*
B
,
float
beta
,
CudaNdarray
*
C
);
DllExport
int
CudaNdarray_sgemv
(
float
alpha
,
const
CudaNdarray
*
A
,
const
CudaNdarray
*
B
,
float
beta
,
CudaNdarray
*
C
);
DllExport
int
CudaNdarray_sgemv
(
float
alpha
,
const
CudaNdarray
*
A
,
const
CudaNdarray
*
B
,
float
beta
,
CudaNdarray
*
C
);
DllExport
int
CudaNdarray_sger
(
float
alpha
,
CudaNdarray
*
x
,
CudaNdarray
*
y
,
CudaNdarray
*
A
);
DllExport
int
CudaNdarray_sger
(
float
alpha
,
const
CudaNdarray
*
x
,
const
CudaNdarray
*
y
,
CudaNdarray
*
A
);
DllExport
int
CudaNdarray_reduce_sum
(
CudaNdarray
*
self
,
CudaNdarray
*
A
);
DllExport
int
CudaNdarray_reduce_sum
(
CudaNdarray
*
self
,
CudaNdarray
*
A
);
DllExport
int
CudaNdarray_reduce_prod
(
CudaNdarray
*
self
,
CudaNdarray
*
A
);
DllExport
int
CudaNdarray_reduce_prod
(
CudaNdarray
*
self
,
CudaNdarray
*
A
);
...
@@ -343,4 +352,4 @@ static void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self);
...
@@ -343,4 +352,4 @@ static void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self);
fill-column:79
fill-column:79
End:
End:
*/
*/
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:
encoding=utf-8:
textwidth=79 :
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
theano/sandbox/cuda/tests/test_blas.py
浏览文件 @
1f6b0c73
import
itertools
from
unittest
import
TestCase
from
unittest
import
TestCase
from
theano.compile.pfunc
import
pfunc
from
theano.compile.pfunc
import
pfunc
...
@@ -17,7 +18,7 @@ import theano.sandbox.cuda as tcn
...
@@ -17,7 +18,7 @@ import theano.sandbox.cuda as tcn
from
theano.tensor.signal.downsample
import
DownsampleFactorMax
,
DownsampleFactorMaxGrad
from
theano.tensor.signal.downsample
import
DownsampleFactorMax
,
DownsampleFactorMaxGrad
import
theano.compile.mode
import
theano.compile.mode
from
theano.tensor.tests.test_blas
import
BaseGemv
,
TestGer
from
theano.tensor.tests.test_blas
import
BaseGemv
,
Test
BlasStrides
,
Test
Ger
from
theano.sandbox.cuda.blas
import
gpu_gemv_no_inplace
,
gpu_gemv_inplace
from
theano.sandbox.cuda.blas
import
gpu_gemv_no_inplace
,
gpu_gemv_inplace
from
theano.sandbox.cuda.blas
import
gpu_ger_inplace
,
gpu_ger_no_inplace
from
theano.sandbox.cuda.blas
import
gpu_ger_inplace
,
gpu_ger_no_inplace
...
@@ -32,20 +33,31 @@ else:
...
@@ -32,20 +33,31 @@ else:
def
my_rand
(
*
shape
):
def
my_rand
(
*
shape
):
return
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
return
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
def
transpose
(
cuda_mat
):
# The easiest way to transpose a cuda matrix for now
return
tcn
.
dimshuffle
(
cuda_mat
,
[
1
,
0
])
def
test_dot22
():
def
test_dot22
():
def
cmp
(
a_shp
,
b_shp
):
def
cmp
(
a_shp
,
b_shp
):
a
=
tcn
.
shared_constructor
(
my_rand
(
*
a_shp
),
'a'
)
a0
=
my_rand
(
*
a_shp
)
a
=
tcn
.
shared_constructor
(
a0
,
'a'
)
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
))],
mode
=
mode_with_gpu
)
a0
=
a
.
get_value
()
*
1.0
bval
=
my_rand
(
*
b_shp
)
bval
=
my_rand
(
*
b_shp
)
f
(
bval
)
f
(
bval
)
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
),
a
.
get_value
())
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
),
a
.
get_value
())
# Try with a matrix equal to a0, but with strides in both dims
a
.
set_value
(
a0
)
a
.
set_value
(
a
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)[::
-
1
,
::
-
1
],
borrow
=
True
)
f
(
bval
)
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
0
))
cmp
((
3
,
4
),(
4
,
0
))
...
@@ -90,7 +102,8 @@ def test_dot22scalar():
...
@@ -90,7 +102,8 @@ def test_dot22scalar():
def
test_gemm
():
def
test_gemm
():
def
cmp
(
a_shp
,
b_shp
):
def
cmp
(
a_shp
,
b_shp
):
a
=
tcn
.
shared_constructor
(
my_rand
(
*
a_shp
),
'a'
)
a0
=
my_rand
(
*
a_shp
)
a
=
tcn
.
shared_constructor
(
a0
,
'a'
)
b
=
tensor
.
fmatrix
(
'b'
)
b
=
tensor
.
fmatrix
(
'b'
)
c
=
tensor
.
fmatrix
(
'c'
)
c
=
tensor
.
fmatrix
(
'c'
)
...
@@ -98,12 +111,19 @@ def test_gemm():
...
@@ -98,12 +111,19 @@ def test_gemm():
f
=
pfunc
([
b
,
c
],
[],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
)
+
tensor
.
exp
(
c
))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
,
c
],
[],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
)
+
tensor
.
exp
(
c
))],
mode
=
mode_with_gpu
)
assert
any
([
node
.
op
==
tcn
.
blas
.
gpu_gemm_inplace
for
node
in
f
.
maker
.
env
.
toposort
()])
assert
any
([
node
.
op
==
tcn
.
blas
.
gpu_gemm_inplace
for
node
in
f
.
maker
.
env
.
toposort
()])
a0
=
a
.
get_value
()
*
1.0
bval
=
my_rand
(
*
b_shp
)
bval
=
my_rand
(
*
b_shp
)
cval
=
my_rand
(
a_shp
[
0
],
b_shp
[
1
])
cval
=
my_rand
(
a_shp
[
0
],
b_shp
[
1
])
f
(
bval
,
cval
)
f
(
bval
,
cval
)
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
)
+
numpy
.
exp
(
cval
),
a
.
get_value
())
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
)
+
numpy
.
exp
(
cval
),
a
.
get_value
())
# Try with a matrix equal to a0, but with strides in both dims
a
.
set_value
(
a0
)
a
.
set_value
(
a
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)[::
-
1
,
::
-
1
],
borrow
=
True
)
f
(
bval
,
cval
)
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
0
))
cmp
((
3
,
4
),(
4
,
0
))
...
@@ -114,7 +134,8 @@ def test_gemm():
...
@@ -114,7 +134,8 @@ def test_gemm():
def
test_gemm_no_inplace
():
def
test_gemm_no_inplace
():
def
cmp
(
a_shp
,
b_shp
):
def
cmp
(
a_shp
,
b_shp
):
a
=
tcn
.
shared_constructor
(
my_rand
(
*
a_shp
),
'a'
)
a0
=
my_rand
(
*
a_shp
)
a
=
tcn
.
shared_constructor
(
a0
,
'a'
)
cval
=
my_rand
(
a_shp
[
0
],
b_shp
[
1
])
cval
=
my_rand
(
a_shp
[
0
],
b_shp
[
1
])
c
=
tcn
.
shared_constructor
(
cval
.
copy
(),
'c'
)
c
=
tcn
.
shared_constructor
(
cval
.
copy
(),
'c'
)
...
@@ -123,7 +144,6 @@ def test_gemm_no_inplace():
...
@@ -123,7 +144,6 @@ def test_gemm_no_inplace():
f
=
pfunc
([
b
,
b2
],
[
tensor
.
dot
(
a
,
b2
)
+
c
],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
)
+
c
)],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
,
b2
],
[
tensor
.
dot
(
a
,
b2
)
+
c
],
updates
=
[(
a
,
tensor
.
dot
(
a
,
b
)
+
c
)],
mode
=
mode_with_gpu
)
a0
=
a
.
get_value
()
*
1.0
assert
any
([
node
.
op
==
tcn
.
blas
.
gpu_gemm_no_inplace
for
node
in
f
.
maker
.
env
.
toposort
()])
assert
any
([
node
.
op
==
tcn
.
blas
.
gpu_gemm_no_inplace
for
node
in
f
.
maker
.
env
.
toposort
()])
bval
=
my_rand
(
*
b_shp
)
bval
=
my_rand
(
*
b_shp
)
bval2
=
my_rand
(
*
b_shp
)
bval2
=
my_rand
(
*
b_shp
)
...
@@ -132,6 +152,13 @@ def test_gemm_no_inplace():
...
@@ -132,6 +152,13 @@ def test_gemm_no_inplace():
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
)
+
cval
,
a
.
get_value
())
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval
)
+
cval
,
a
.
get_value
())
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval2
)
+
cval
,
rval
)
assert
numpy
.
allclose
(
numpy
.
dot
(
a0
,
bval2
)
+
cval
,
rval
)
# Try with a matrix equal to a0, but with strides in both dims
a
.
set_value
(
a0
)
a
.
set_value
(
a
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)[::
-
1
,
::
-
1
],
borrow
=
True
)
f
(
bval
,
bval2
)
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
0
,
4
),(
4
,
5
))
cmp
((
3
,
4
),(
4
,
0
))
cmp
((
3
,
4
),(
4
,
0
))
...
@@ -139,6 +166,13 @@ def test_gemm_no_inplace():
...
@@ -139,6 +166,13 @@ def test_gemm_no_inplace():
cmp
((
0
,
4
),(
4
,
0
))
cmp
((
0
,
4
),(
4
,
0
))
cmp
((
0
,
0
),(
0
,
0
))
cmp
((
0
,
0
),(
0
,
0
))
class
TestBlasStridesGpu
(
TestBlasStrides
):
dtype
=
'float32'
shared
=
staticmethod
(
tcn
.
shared_constructor
)
mode
=
mode_with_gpu
def
test_outer
():
def
test_outer
():
x
=
tcn
.
shared_constructor
(
my_rand
(
8
,),
'x'
)
x
=
tcn
.
shared_constructor
(
my_rand
(
8
,),
'x'
)
y
=
tcn
.
shared_constructor
(
my_rand
(
6
,),
'y'
)
y
=
tcn
.
shared_constructor
(
my_rand
(
6
,),
'y'
)
...
@@ -260,6 +294,23 @@ class TestGpuGemv(TestCase, BaseGemv,
...
@@ -260,6 +294,23 @@ class TestGpuGemv(TestCase, BaseGemv,
gemv
=
gpu_gemv_inplace
gemv
=
gpu_gemv_inplace
gemv_inplace
=
gpu_gemv_inplace
gemv_inplace
=
gpu_gemv_inplace
class
TestGpuGemvNoTransfer
(
TestCase
,
BaseGemv
,
unittest_tools
.
TestOptimizationMixin
):
mode
=
mode_with_gpu
dtype
=
'float32'
# Mimic shared constructors registry
@staticmethod
def
shared
(
val
):
try
:
return
tcn
.
shared_constructor
(
val
)
except
TypeError
:
return
theano
.
shared
(
val
)
# In this test, inputs are not always transfered to GPU
gemv
=
gpu_gemv_no_inplace
gemv_inplace
=
gpu_gemv_inplace
class
TestVectorMatrixDot
(
TestCase
):
class
TestVectorMatrixDot
(
TestCase
):
### Tolerance factor used in this tests
### Tolerance factor used in this tests
...
@@ -286,6 +337,14 @@ class TestVectorMatrixDot(TestCase):
...
@@ -286,6 +337,14 @@ class TestVectorMatrixDot(TestCase):
assert
sum
([
node
.
op
is
gpu_gemv_inplace
for
node
in
assert
sum
([
node
.
op
is
gpu_gemv_inplace
for
node
in
gpu_f2
.
maker
.
env
.
toposort
()
])
==
1
gpu_f2
.
maker
.
env
.
toposort
()
])
==
1
# Check double-strided m
m
.
set_value
(
m
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)[::
-
1
,
::
-
1
],
borrow
=
True
)
assert
numpy
.
allclose
(
no_gpu_f
(),
gpu_f
(),
atol
=
self
.
atol
)
assert
numpy
.
allclose
(
no_gpu_f
(),
gpu_f2
(),
atol
=
self
.
atol
)
def
test_dot_mv
(
self
):
def
test_dot_mv
(
self
):
''' Test matrix dot vector '''
''' Test matrix dot vector '''
v
=
theano
.
shared
(
numpy
.
array
(
numpy
.
random
.
rand
(
2
),
dtype
=
'float32'
))
v
=
theano
.
shared
(
numpy
.
array
(
numpy
.
random
.
rand
(
2
),
dtype
=
'float32'
))
...
@@ -365,6 +424,26 @@ class TestGpuGer(TestGer):
...
@@ -365,6 +424,26 @@ class TestGpuGer(TestGer):
self
.
ger
=
gpu_ger_inplace
self
.
ger
=
gpu_ger_inplace
self
.
gemm
=
tcn
.
blas
.
gpu_gemm_inplace
self
.
gemm
=
tcn
.
blas
.
gpu_gemm_inplace
class
TestGpuGerNoTransfer
(
TestGer
):
@staticmethod
def
shared
(
val
):
try
:
return
tcn
.
shared_constructor
(
val
)
except
TypeError
:
return
theano
.
shared
(
val
)
def
setUp
(
self
):
self
.
mode
=
mode_with_gpu
dtype
=
self
.
dtype
=
'float32'
# optimization isn't dtype-dependent
self
.
A
=
tensor
.
tensor
(
dtype
=
dtype
,
broadcastable
=
(
False
,
False
))
self
.
a
=
tensor
.
tensor
(
dtype
=
dtype
,
broadcastable
=
())
self
.
x
=
tensor
.
tensor
(
dtype
=
dtype
,
broadcastable
=
(
False
,))
self
.
y
=
tensor
.
tensor
(
dtype
=
dtype
,
broadcastable
=
(
False
,))
# data on the gpu make the op always inplace
self
.
ger
=
gpu_ger_inplace
self
.
ger_destructive
=
gpu_ger_inplace
self
.
gemm
=
tcn
.
blas
.
gpu_gemm_inplace
class
TestGpuGer_OpContract
(
TestCase
,
unittest_tools
.
T_OpContractMixin
):
class
TestGpuGer_OpContract
(
TestCase
,
unittest_tools
.
T_OpContractMixin
):
def
setUp
(
self
):
def
setUp
(
self
):
...
...
theano/tensor/blas.py
浏览文件 @
1f6b0c73
...
@@ -496,7 +496,9 @@ class GemmRelated(Op):
...
@@ -496,7 +496,9 @@ class GemmRelated(Op):
if ((Sx[0] < 1) || (Sx[1] < 1) || (Sx[0] MOD type_size) || (Sx[1] MOD type_size)
if ((Sx[0] < 1) || (Sx[1] < 1) || (Sx[0] MOD type_size) || (Sx[1] MOD type_size)
|| ((Sx[0] != type_size) && (Sx[1] != type_size)))
|| ((Sx[0] != type_size) && (Sx[1] != type_size)))
{
{
PyArrayObject * _x_copy = PyArray_GETCONTIGUOUS(
%(_x)
s);
PyArrayObject * _x_copy = (PyArrayObject *) PyArray_Copy(
%(_x)
s);
if (!_x_copy)
%(fail)
s
Py_XDECREF(
%(_x)
s);
Py_XDECREF(
%(_x)
s);
%(_x)
s = _x_copy;
%(_x)
s = _x_copy;
Sx =
%(_x)
s->strides;
Sx =
%(_x)
s->strides;
...
@@ -505,7 +507,9 @@ class GemmRelated(Op):
...
@@ -505,7 +507,9 @@ class GemmRelated(Op):
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
|| ((Sy[0] != type_size) && (Sy[1] != type_size)))
|| ((Sy[0] != type_size) && (Sy[1] != type_size)))
{
{
PyArrayObject * _y_copy = PyArray_GETCONTIGUOUS(
%(_y)
s);
PyArrayObject * _y_copy = (PyArrayObject *) PyArray_Copy(
%(_y)
s);
if (!_y_copy)
%(fail)
s
Py_XDECREF(
%(_y)
s);
Py_XDECREF(
%(_y)
s);
%(_y)
s = _y_copy;
%(_y)
s = _y_copy;
Sy =
%(_y)
s->strides;
Sy =
%(_y)
s->strides;
...
@@ -514,7 +518,9 @@ class GemmRelated(Op):
...
@@ -514,7 +518,9 @@ class GemmRelated(Op):
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
|| ((Sz[0] != type_size) && (Sz[1] != type_size)))
|| ((Sz[0] != type_size) && (Sz[1] != type_size)))
{
{
PyArrayObject * _z_copy = PyArray_GETCONTIGUOUS(
%(_zout)
s);
PyArrayObject * _z_copy = (PyArrayObject *) PyArray_Copy(
%(_zout)
s);
if (!_z_copy)
%(fail)
s
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = _z_copy;
%(_zout)
s = _z_copy;
Sz =
%(_zout)
s->strides;
Sz =
%(_zout)
s->strides;
...
@@ -649,7 +655,7 @@ class GemmRelated(Op):
...
@@ -649,7 +655,7 @@ class GemmRelated(Op):
self
.
end_switch_typenum
),
''
)
self
.
end_switch_typenum
),
''
)
def
build_gemm_version
(
self
):
def
build_gemm_version
(
self
):
return
(
1
0
,)
return
(
1
2
,)
class
Gemm
(
GemmRelated
):
class
Gemm
(
GemmRelated
):
"""In-place version of matrix-matrix multiplication (with accumulation):
"""In-place version of matrix-matrix multiplication (with accumulation):
...
...
theano/tensor/blas_c.py
浏览文件 @
1f6b0c73
差异被折叠。
点击展开。
theano/tensor/blas_scipy.py
浏览文件 @
1f6b0c73
...
@@ -44,7 +44,12 @@ class ScipyGer(Ger):
...
@@ -44,7 +44,12 @@ class ScipyGer(Ger):
# N.B. some versions of scipy (e.g. mine) don't actually work
# N.B. some versions of scipy (e.g. mine) don't actually work
# in-place on a, even when I tell it to.
# in-place on a, even when I tell it to.
A
=
cA
[
0
]
A
=
cA
[
0
]
if
A
.
flags
[
'C_CONTIGUOUS'
]:
if
A
.
size
==
0
:
# We don't have to do anything, A is empty.
# We need this special case because Numpy considers it
# C-contiguous, wich is confusing.
pass
elif
A
.
flags
[
'C_CONTIGUOUS'
]:
A
=
local_ger
(
calpha
[
0
],
cy
[
0
],
cx
[
0
],
a
=
A
.
T
,
A
=
local_ger
(
calpha
[
0
],
cy
[
0
],
cx
[
0
],
a
=
A
.
T
,
overwrite_a
=
int
(
self
.
destructive
))
.
T
overwrite_a
=
int
(
self
.
destructive
))
.
T
else
:
else
:
...
...
theano/tensor/tests/test_blas.py
浏览文件 @
1f6b0c73
差异被折叠。
点击展开。
theano/tensor/tests/test_blas_c.py
浏览文件 @
1f6b0c73
import
sys
import
sys
import
numpy
import
numpy
from
unittest
import
TestCase
import
theano
import
theano
import
theano.tensor
as
tensor
import
theano.tensor
as
tensor
...
@@ -14,8 +16,7 @@ from theano.tensor.blas import Gemv
...
@@ -14,8 +16,7 @@ from theano.tensor.blas import Gemv
from
theano.tests
import
unittest_tools
from
theano.tests
import
unittest_tools
from
theano.tests.unittest_tools
import
TestOptimizationMixin
from
theano.tests.unittest_tools
import
TestOptimizationMixin
from
test_blas
import
TestCase
from
theano.tensor.tests.test_blas
import
BaseGemv
,
TestBlasStrides
from
test_blas
import
BaseGemv
mode_blas_opt
=
theano
.
compile
.
get_default_mode
()
.
including
(
mode_blas_opt
=
theano
.
compile
.
get_default_mode
()
.
including
(
'BlasOpt'
,
'specialize'
,
'InplaceBlasOpt'
,
'c_blas'
)
'BlasOpt'
,
'specialize'
,
'InplaceBlasOpt'
,
'c_blas'
)
...
@@ -41,7 +42,8 @@ class TestCGer(TestCase, TestOptimizationMixin):
...
@@ -41,7 +42,8 @@ class TestCGer(TestCase, TestOptimizationMixin):
)
)
def
run_f
(
self
,
f
):
def
run_f
(
self
,
f
):
return
f
(
self
.
Aval
,
self
.
xval
,
self
.
yval
)
f
(
self
.
Aval
,
self
.
xval
,
self
.
yval
)
f
(
self
.
Aval
[::
-
1
,
::
-
1
],
self
.
xval
,
self
.
yval
)
def
b
(
self
,
bval
):
def
b
(
self
,
bval
):
return
tensor
.
as_tensor_variable
(
numpy
.
asarray
(
bval
,
dtype
=
self
.
dtype
))
return
tensor
.
as_tensor_variable
(
numpy
.
asarray
(
bval
,
dtype
=
self
.
dtype
))
...
@@ -132,6 +134,10 @@ class TestCGemv(TestCase, TestOptimizationMixin):
...
@@ -132,6 +134,10 @@ class TestCGemv(TestCase, TestOptimizationMixin):
assert
numpy
.
allclose
(
f
(
self
.
xval
,
self
.
Aval
),
assert
numpy
.
allclose
(
f
(
self
.
xval
,
self
.
Aval
),
numpy
.
dot
(
self
.
xval
,
self
.
Aval
))
numpy
.
dot
(
self
.
xval
,
self
.
Aval
))
# Test with negative strides on 2 dims
assert
numpy
.
allclose
(
f
(
self
.
xval
,
self
.
Aval
[::
-
1
,
::
-
1
]),
numpy
.
dot
(
self
.
xval
,
self
.
Aval
[::
-
1
,
::
-
1
]))
def
test_optimizations_mv
(
self
):
def
test_optimizations_mv
(
self
):
''' Test matrix dot vector '''
''' Test matrix dot vector '''
f
=
theano
.
function
([
self
.
A
,
self
.
y
],
f
=
theano
.
function
([
self
.
A
,
self
.
y
],
...
@@ -145,6 +151,10 @@ class TestCGemv(TestCase, TestOptimizationMixin):
...
@@ -145,6 +151,10 @@ class TestCGemv(TestCase, TestOptimizationMixin):
# Assert they produce the same output
# Assert they produce the same output
assert
numpy
.
allclose
(
f
(
self
.
Aval
,
self
.
yval
),
assert
numpy
.
allclose
(
f
(
self
.
Aval
,
self
.
yval
),
numpy
.
dot
(
self
.
Aval
,
self
.
yval
))
numpy
.
dot
(
self
.
Aval
,
self
.
yval
))
# Test with negative strides on 2 dims
assert
numpy
.
allclose
(
f
(
self
.
Aval
[::
-
1
,
::
-
1
],
self
.
yval
),
numpy
.
dot
(
self
.
Aval
[::
-
1
,
::
-
1
],
self
.
yval
))
def
t_gemv1
(
self
,
m_shp
):
def
t_gemv1
(
self
,
m_shp
):
''' test vector2 + dot(matrix, vector1) '''
''' test vector2 + dot(matrix, vector1) '''
...
@@ -164,17 +174,28 @@ class TestCGemv(TestCase, TestOptimizationMixin):
...
@@ -164,17 +174,28 @@ class TestCGemv(TestCase, TestOptimizationMixin):
assert
topo
==
[
CGemv
(
inplace
=
False
)],
topo
assert
topo
==
[
CGemv
(
inplace
=
False
)],
topo
#test the inplace version
#test the inplace version
f
=
theano
.
function
([],
[],
g
=
theano
.
function
([],
[],
updates
=
{
v2
:
v2
+
theano
.
dot
(
m
,
v1
)},
updates
=
{
v2
:
v2
+
theano
.
dot
(
m
,
v1
)},
mode
=
self
.
mode
)
mode
=
self
.
mode
)
# Assert they produce the same output
# Assert they produce the same output
f
()
g
()
assert
numpy
.
allclose
(
v2
.
get_value
(),
assert
numpy
.
allclose
(
v2
.
get_value
(),
numpy
.
dot
(
m
.
get_value
(),
v1
.
get_value
())
+
v2_orig
)
numpy
.
dot
(
m
.
get_value
(),
v1
.
get_value
())
+
v2_orig
)
topo
=
[
n
.
op
for
n
in
f
.
maker
.
env
.
toposort
()]
topo
=
[
n
.
op
for
n
in
g
.
maker
.
env
.
toposort
()]
assert
topo
==
[
CGemv
(
inplace
=
True
)]
assert
topo
==
[
CGemv
(
inplace
=
True
)]
# Do the same tests with a matrix with strides in both dimensions
m
.
set_value
(
m
.
get_value
(
borrow
=
True
)[::
-
1
,
::
-
1
],
borrow
=
True
)
v2
.
set_value
(
v2_orig
)
assert
numpy
.
allclose
(
f
(),
numpy
.
dot
(
m
.
get_value
(),
v1
.
get_value
())
+
v2_orig
)
g
()
assert
numpy
.
allclose
(
v2
.
get_value
(),
numpy
.
dot
(
m
.
get_value
(),
v1
.
get_value
())
+
v2_orig
)
def
test_gemv1
(
self
):
def
test_gemv1
(
self
):
self
.
t_gemv1
((
3
,
2
))
self
.
t_gemv1
((
3
,
2
))
self
.
t_gemv1
((
0
,
2
))
self
.
t_gemv1
((
0
,
2
))
...
@@ -200,6 +221,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
...
@@ -200,6 +221,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
ones_6
=
numpy
.
ones
(
6
,
dtype
=
dtype
)
ones_6
=
numpy
.
ones
(
6
,
dtype
=
dtype
)
f
(
A_val
,
ones_3
,
ones_5
)
f
(
A_val
,
ones_3
,
ones_5
)
f
(
A_val
[::
-
1
,
::
-
1
],
ones_3
,
ones_5
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_4
,
ones_5
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_4
,
ones_5
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_3
,
ones_6
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_3
,
ones_6
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_4
,
ones_6
)
self
.
assertRaises
(
ValueError
,
f
,
A_val
,
ones_4
,
ones_6
)
...
@@ -217,3 +239,6 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
...
@@ -217,3 +239,6 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
dtype
=
'float64'
dtype
=
'float64'
gemv
=
CGemv
(
inplace
=
False
)
gemv
=
CGemv
(
inplace
=
False
)
gemv_inplace
=
CGemv
(
inplace
=
True
)
gemv_inplace
=
CGemv
(
inplace
=
True
)
class
TestBlasStridesC
(
TestBlasStrides
):
mode
=
mode_blas_opt
theano/tensor/tests/test_blas_scipy.py
浏览文件 @
1f6b0c73
...
@@ -4,7 +4,7 @@ import theano
...
@@ -4,7 +4,7 @@ import theano
import
theano.tensor
as
tensor
import
theano.tensor
as
tensor
from
theano.tensor.blas_scipy
import
ScipyGer
from
theano.tensor.blas_scipy
import
ScipyGer
from
test_blas
import
TestCase
,
gemm_no_inplace
from
test_blas
import
TestCase
,
gemm_no_inplace
,
TestBlasStrides
from
theano.tests.unittest_tools
import
TestOptimizationMixin
from
theano.tests.unittest_tools
import
TestOptimizationMixin
class
TestScipyGer
(
TestCase
,
TestOptimizationMixin
):
class
TestScipyGer
(
TestCase
,
TestOptimizationMixin
):
...
@@ -30,6 +30,7 @@ class TestScipyGer(TestCase, TestOptimizationMixin):
...
@@ -30,6 +30,7 @@ class TestScipyGer(TestCase, TestOptimizationMixin):
def
run_f
(
self
,
f
):
def
run_f
(
self
,
f
):
f
(
self
.
Aval
,
self
.
xval
,
self
.
yval
)
f
(
self
.
Aval
,
self
.
xval
,
self
.
yval
)
f
(
self
.
Aval
[::
-
1
,
::
-
1
],
self
.
xval
[::
-
1
],
self
.
yval
[::
-
1
])
def
b
(
self
,
bval
):
def
b
(
self
,
bval
):
return
tensor
.
as_tensor_variable
(
numpy
.
asarray
(
bval
,
dtype
=
self
.
dtype
))
return
tensor
.
as_tensor_variable
(
numpy
.
asarray
(
bval
,
dtype
=
self
.
dtype
))
...
@@ -55,3 +56,7 @@ class TestScipyGer(TestCase, TestOptimizationMixin):
...
@@ -55,3 +56,7 @@ class TestScipyGer(TestCase, TestOptimizationMixin):
0.2
*
self
.
A
+
0.1
*
tensor
.
outer
(
self
.
x
,
self
.
y
))
0.2
*
self
.
A
+
0.1
*
tensor
.
outer
(
self
.
x
,
self
.
y
))
self
.
assertFunctionContains
(
f
,
gemm_no_inplace
)
self
.
assertFunctionContains
(
f
,
gemm_no_inplace
)
self
.
run_f
(
f
)
#DebugMode tests correctness
self
.
run_f
(
f
)
#DebugMode tests correctness
class
TestBlasStridesScipy
(
TestBlasStrides
):
mode
=
theano
.
compile
.
get_default_mode
()
mode
=
mode
.
including
(
'fast_run'
)
.
excluding
(
'gpu'
,
'c_blas'
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论