Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ff24c985
提交
ff24c985
authored
7月 23, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test_elemwise4 passed; modified setting of stride in alloc_contiguous to set…
test_elemwise4 passed; modified setting of stride in alloc_contiguous to set stride 0 for dimensions of size 1
上级
43f97ea3
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
132 行增加
和
39 行删除
+132
-39
basic_ops.py
basic_ops.py
+49
-21
test_basic_ops.py
tests/test_basic_ops.py
+26
-1
type.py
type.py
+49
-13
var.py
var.py
+8
-4
没有找到文件。
basic_ops.py
浏览文件 @
ff24c985
...
...
@@ -19,7 +19,7 @@ class HostFromGpu(Op):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
'
<HostFromGpu@
%
i>'
%
id
(
self
)
return
'
HostFromGpu'
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
CudaNdarrayType
):
raise
TypeError
(
x
)
...
...
@@ -36,7 +36,7 @@ class GpuFromHost(Op):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
'
<GpuFromHost@
%
i>'
%
id
(
self
)
return
'
GpuFromHost'
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
tensor
.
TensorType
):
raise
TypeError
(
x
)
...
...
@@ -102,9 +102,21 @@ class GpuElemwise(Op):
if
self
.
nin
>
0
and
len
(
_inputs
)
!=
self
.
nin
:
raise
TypeError
(
'Wrong argument count'
,
(
self
.
nin
,
len
(
_inputs
)))
for
i
in
_inputs
[
1
:]:
if
i
.
type
.
broadcastable
!=
inputs
[
0
]
.
type
.
broadcastable
:
raise
NotImplementedError
(
'different bcastable'
)
otype
=
CudaNdarrayType
(
broadcastable
=
_inputs
[
0
]
.
broadcastable
)
if
i
.
type
.
ndim
!=
inputs
[
0
]
.
type
.
ndim
:
raise
TypeError
(
'different ranks among inputs'
)
# output is broadcastable only along dimensions where all inputs are broadcastable
broadcastable
=
[]
for
d
in
xrange
(
_inputs
[
0
]
.
type
.
ndim
):
bcast_d
=
True
for
i
in
_inputs
:
if
not
i
.
type
.
broadcastable
[
d
]:
bcast_d
=
False
break
broadcastable
.
append
(
bcast_d
)
assert
len
(
broadcastable
)
==
_inputs
[
0
]
.
type
.
ndim
otype
=
CudaNdarrayType
(
broadcastable
=
broadcastable
)
assert
self
.
nout
>
0
return
Apply
(
self
,
_inputs
,
[
otype
()
for
o
in
xrange
(
self
.
nout
)])
def
c_support_code
(
self
):
...
...
@@ -274,37 +286,38 @@ class GpuElemwise(Op):
nout
=
len
(
outputs
)
fail
=
sub
[
'fail'
]
opname
=
str
(
self
.
scalar_op
)
print
>>
sio
,
"""
//std::cerr << "C_CODE
%(opname)
s START
\\
n";
initial_dims
=
','
.
join
(
'1'
for
i
in
xrange
(
nd
))
if
1
or
self
.
scalar_op
==
scalar
.
pow
:
print
>>
sio
,
"""
std::cerr << "C_CODE
%(opname)
s START
\\
n";
//standard elemwise size checks
const int * dims = NULL;
"""
%
locals
()
print
>>
sio
,
"""
int dims[
%(nd)
s] = {
%(initial_dims)
s};
"""
%
locals
()
for
iname
in
inputs
:
print
>>
sio
,
"""
std::cerr << "C_CODE
%(opname)
s checking input
%(iname)
s
\\
n";
if (
%(nd)
s != cnda_
%(iname)
s->nd)
{
PyErr_Format(PyExc_TypeError, "need
%(nd)
s dims, not
%%
i", cnda_
%(iname)
s->nd);
%(fail)
s;
}
"""
%
locals
()
for
iname0
,
iname1
in
zip
(
inputs
[
1
:],
inputs
[:
-
1
]):
print
>>
sio
,
"""
//standard elemwise dim checks
for (int i = 0; i<
%(nd)
s; ++i)
{
if (cnda_
%(iname0)
s->dim[i] != cnda_
%(iname1)
s->dim[i])
dims[i] = (dims[i] == 1) ? cnda_
%(iname)
s->dim[i] : dims[i];
if ((cnda_
%(iname)
s->dim[i] != 1) && (dims[i] != cnda_
%(iname)
s->dim[i]))
{
PyErr_SetString(PyExc_TypeError, "need same dimensions");
std::cerr << "C_CODE
%(opname)
s checking input
%(iname)
s failed
\\
n";
PyErr_Format(PyExc_TypeError, "GpuElemwise input has incompatible dim[
%%
i] ==
%%
i, where output has size
%%
i",
i,
cnda_
%(iname)
s->dim[i],
dims[i]
);
%(fail)
s;
}
}
"""
%
locals
()
iname0
=
inputs
[
0
]
print
>>
sio
,
"""
dims = cnda_
%(iname0)
s->dim;
//unsigned int size = CudaNdarray_SIZE(cnda_
%(iname0)
s);
//std::cerr << "ADD size " << size << "
\\
n";
"""
%
locals
()
for
oname
in
outputs
:
print
>>
sio
,
"""
...
...
@@ -329,13 +342,14 @@ class GpuElemwise(Op):
%(fail)
s;
}
}
//
std::cerr << "ELEMWISE NEW
%(oname)
s nd" << cnda_
%(oname)
s->nd << "
\\
n";
std::cerr << "ELEMWISE NEW
%(oname)
s nd" << cnda_
%(oname)
s->nd << "
\\
n";
//std::cerr << "ELEMWISE NEW
%(oname)
s data" << cnda_
%(oname)
s->devdata << "
\\
n";
"""
%
locals
()
print
>>
sio
,
"""
{
//new block so that failure gotos don't skip over variable initialization
int log2_dims[
%(nd)
s];
std::cerr << "calling callkernel
\\
n";
callkernel_
%(nodename)
s(1, 0, dims, log2_dims
"""
%
locals
()
for
iname
in
inputs
:
...
...
@@ -349,6 +363,7 @@ class GpuElemwise(Op):
print
>>
sio
,
"""
);
std::cerr << "calling callkernel returned
\\
n";
cudaThreadSynchronize();
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
...
...
@@ -462,6 +477,12 @@ class GpuDimShuffle(Op):
#alloc an output
print
>>
sio
,
"""
if (cnda_
%(res)
s)
{
//TODO: re-use previously-allocated stuff
Py_DECREF(cnda_
%(res)
s);
cnda_
%(res)
s = NULL;
}
if (NULL == cnda_
%(res)
s) {
cnda_
%(res)
s = (CudaNdarray*) CudaNdarray_new_null();
if (NULL == cnda_
%(res)
s)
...
...
@@ -493,16 +514,23 @@ class GpuDimShuffle(Op):
#reassign the dimension and strides in the host pointers
for
i
,
o
in
enumerate
(
self
.
new_order
):
if
o
==
'x'
:
assert
node
.
outputs
[
0
]
.
type
.
broadcastable
[
i
]
print
>>
sio
,
"""
cnda_
%(res)
s->dim[
%(i)
s] = 1;
cnda_
%(res)
s->str[
%(i)
s] = 0;
"""
%
locals
()
else
:
assert
not
node
.
outputs
[
0
]
.
type
.
broadcastable
[
i
]
print
>>
sio
,
"""
cnda_
%(res)
s->dim[
%(i)
s] = cnda_
%(input)
s->dim[
%(o)
s];
cnda_
%(res)
s->str[
%(i)
s] = cnda_
%(input)
s->str[
%(o)
s];
"""
%
locals
()
for
i
,
o
in
enumerate
(
self
.
new_order
):
print
>>
sio
,
"""
std::cerr << "GpuDimShuffle " << cnda_
%(res)
s << " str[
%(i)
s] = " << cnda_
%(res)
s->str[
%(i)
s] << "
\\
n";
"""
%
locals
()
# copy the host dims and stride -> device
print
>>
sio
,
"""
if (CudaNdarray_copy_structure_to_device(cnda_
%(res)
s))
...
...
tests/test_basic_ops.py
浏览文件 @
ff24c985
...
...
@@ -87,9 +87,34 @@ def test_elemwise3():
shape
=
(
3
,
4
,
5
,
6
)
a
=
tcn
.
shared_constructor
(
numpy
.
random
.
rand
(
*
shape
),
'a'
)
b
=
tensor
.
dvector
()
b
=
tensor
.
fvector
()
print
b
.
type
print
tensor
.
constant
(
1
)
.
type
print
(
1
+
b
)
.
type
print
(
1
+
b
**
a
)
.
type
print
tensor
.
exp
((
1
+
b
**
a
))
.
type
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
*
tensor
.
exp
(
1
+
b
**
a
)
.
dimshuffle
([
2
,
0
,
3
,
1
]))])
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
>>
sys
.
stderr
,
i
,
node
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
assert
not
has_elemwise
#let debugmode catch errors
f
(
numpy
.
random
.
rand
(
6
))
def
test_elemwise4
():
""" Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""
shape
=
(
3
,
4
)
a
=
tcn
.
shared_constructor
(
numpy
.
random
.
rand
(
*
shape
),
'a'
)
b
=
tensor
.
fvector
()
c
=
tensor
.
fvector
()
f
=
pfunc
([
b
,
c
],
[],
updates
=
[(
a
,
(
a
+
b
.
dimshuffle
(
'x'
,
0
)
*
x
.
dimshuffle
(
0
,
'x'
)))])
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
>>
sys
.
stderr
,
i
,
node
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
assert
not
has_elemwise
#let debugmode catch errors
f
(
numpy
.
random
.
rand
(
4
),
numpy
.
random
.
rand
(
3
))
type.py
浏览文件 @
ff24c985
import
sys
,
os
import
sys
,
os
,
StringIO
import
numpy
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
...
...
@@ -130,10 +130,45 @@ class CudaNdarrayType(Type):
return
"cnda_
%(name)
s = NULL;"
%
locals
()
def
c_extract
(
self
,
name
,
sub
):
return
"""
sio
=
StringIO
.
StringIO
()
fail
=
sub
[
'fail'
]
nd
=
self
.
ndim
print
>>
sio
,
"""
if (CudaNdarray_Check(py_
%(name)
s))
{
cnda_
%(name)
s = (CudaNdarray*)py_
%(name)
s;
std::cerr << "c_extract " << cnda_
%(name)
s << '
\\
n';
if (cnda_
%(name)
s->nd !=
%(nd)
s)
{
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has rank
%%
i, it was supposed to have rank
%(nd)
s", cnda_
%(name)
s->nd);
cnda_
%(name)
s = NULL;
%(fail)
s;
}
std::cerr << "c_extract " << cnda_
%(name)
s << " nd check passed
\\
n";
"""
%
locals
()
for
i
,
b
in
enumerate
(
self
.
broadcastable
):
if
b
:
print
>>
sio
,
"""
if (cnda_
%(name)
s->dim[
%(i)
s] != 1)
{
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has dim
%%
i on broadcastable dimension
%%
i", cnda_
%(name)
s->dim[
%(i)
s],
%(i)
s);
cnda_
%(name)
s = NULL;
%(fail)
s;
}
std::cerr << "c_extract " << cnda_
%(name)
s << "dim check
%(i)
s passed
\\
n";
std::cerr << "c_extract " << cnda_
%(name)
s << "checking bcast
%(i)
s <" << cnda_
%(name)
s->str<< ">
\\
n";
std::cerr << "c_extract " << cnda_
%(name)
s->str[
%(i)
s] << "
\\
n";
if (cnda_
%(name)
s->str[
%(i)
s])
{
std::cerr << "c_extract bad stride detected...
\\
n";
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has a nonzero stride
%%
i on a broadcastable dimension
%%
i", cnda_
%(name)
s->str[
%(i)
s],
%(i)
s);
cnda_
%(name)
s = NULL;
%(fail)
s;
}
std::cerr << "c_extract " << cnda_
%(name)
s << "bcast check
%(i)
s passed
\\
n";
"""
%
locals
()
print
>>
sio
,
"""
assert(cnda_
%(name)
s);
Py_INCREF(py_
%(name)
s);
}
else
...
...
@@ -142,12 +177,19 @@ class CudaNdarrayType(Type):
cnda_
%(name)
s = NULL;
%(fail)
s;
}
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
std::cerr << "c_extract done " << cnda_
%(name)
s << '
\\
n';
"""
%
locals
()
#print sio.getvalue()
return
sio
.
getvalue
()
def
c_cleanup
(
self
,
name
,
sub
):
return
"""
//std::cerr << "cleanup " << py_
%(name)
s << "
\\
n";
Py_XDECREF(py_
%(name)
s);
std::cerr << "cleanup " << py_
%(name)
s << " " << cnda_
%(name)
s << "
\\
n";
if (cnda_
%(name)
s)
{
Py_XDECREF(cnda_
%(name)
s);
}
std::cerr << "cleanup done" << py_
%(name)
s << "
\\
n";
"""
%
locals
()
def
c_sync
(
self
,
name
,
sub
):
...
...
@@ -194,12 +236,6 @@ class CudaNdarrayType(Type):
def
c_code_cache_version
(
self
):
return
()
#do not cache this stuff until it matures
def
c_compiler
(
self
):
return
nvcc_module_compile_str
def
c_compiler
(
self
):
return
nvcc_module_compile_str
var.py
浏览文件 @
ff24c985
...
...
@@ -49,10 +49,14 @@ class CudaNdarraySharedVariable(SharedVariable, _operators):
if
hasattr
(
other
,
'_as_CudaNdarrayVariable'
):
return
other
.
_as_CudaNdarrayVariable
()
if
isinstance
(
other
.
type
,
tensor
.
TensorType
)
and
(
other
.
type
.
dtype
==
self
.
dtype
)
and
(
other
.
broadcastable
==
self
.
broadcastable
):
return
GpuFromHost
()(
other
)
else
:
raise
TypeError
((
other
,
other
.
type
))
if
not
isinstance
(
other
.
type
,
tensor
.
TensorType
):
raise
TypeError
(
'Incompatible type'
,
other
.
type
)
if
(
other
.
type
.
dtype
!=
self
.
dtype
):
raise
TypeError
(
'Incompatible dtype'
,
(
self
.
dtype
,
other
.
type
.
dtype
))
if
(
other
.
type
.
broadcastable
!=
self
.
broadcastable
):
raise
TypeError
(
'Incompatible broadcastable'
,
(
self
.
broadcastable
,
other
.
type
.
broadcastable
))
return
GpuFromHost
()(
other
)
CudaNdarrayType
.
SharedVariable
=
CudaNdarraySharedVariable
def
shared_constructor
(
value
,
name
,
strict
=
False
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论