Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
12ec7339
提交
12ec7339
authored
4月 01, 2014
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1790 from nouiz/mixed
Mixed
上级
66d7f5c7
0860c441
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
72 行增加
和
31 行删除
+72
-31
install_ubuntu.txt
doc/install_ubuntu.txt
+4
-15
profiling.py
theano/compile/profiling.py
+13
-4
check_blas.py
theano/misc/check_blas.py
+1
-0
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+34
-4
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+14
-5
basic.py
theano/tensor/basic.py
+4
-1
opt.py
theano/tensor/opt.py
+2
-2
没有找到文件。
doc/install_ubuntu.txt
浏览文件 @
12ec7339
...
@@ -3,8 +3,8 @@
...
@@ -3,8 +3,8 @@
Easy Installation of an optimized Theano on Ubuntu
Easy Installation of an optimized Theano on Ubuntu
==================================================
==================================================
These instruction was done for Ubuntu 11.04, 11.10
and 12.04. You can
These instruction was done for Ubuntu 11.04, 11.10
, 12.04, 12.10, 13.04
probably do something similar on older computer.
and 13.10. You can
probably do something similar on older computer.
.. note::
.. note::
...
@@ -49,7 +49,7 @@ probably do something similar on older computer.
...
@@ -49,7 +49,7 @@ probably do something similar on older computer.
Installation steps
Installation steps
~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~
Ubuntu 11.10/12.04/12.10/13.04:
Ubuntu 11.10/12.04/12.10/13.04
/13.10
:
1) ``sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libopenblas-dev git``
1) ``sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libopenblas-dev git``
2) ``sudo pip install Theano``
2) ``sudo pip install Theano``
...
@@ -236,15 +236,4 @@ Test GPU configuration
...
@@ -236,15 +236,4 @@ Test GPU configuration
Ubuntu 12.10: default gcc version 4.7.2. gcc 4.4.7, 4.5.4 and 4.6.3 availables.
Ubuntu 12.10: default gcc version 4.7.2. gcc 4.4.7, 4.5.4 and 4.6.3 availables.
Ubuntu 13.10: default gcc version 4.8.1. gcc 4.4.7, 4.6.4 and 4.7.3 availables.
theano/compile/profiling.py
浏览文件 @
12ec7339
...
@@ -507,13 +507,22 @@ class ProfileStats(object):
...
@@ -507,13 +507,22 @@ class ProfileStats(object):
print
>>
file
,
header_str
print
>>
file
,
header_str
atimes
=
[(
topos
=
{}
# Only do the topo once per fct.
atimes
=
[]
for
a
,
t
in
self
.
apply_time
.
items
():
if
a
.
fgraph
not
in
topos
:
topo
=
a
.
fgraph
.
toposort
()
topos
[
a
.
fgraph
]
=
topo
else
:
topo
=
topos
[
a
.
fgraph
]
atimes
.
append
((
t
*
100
/
local_time
,
t
*
100
/
local_time
,
t
,
t
,
a
,
a
,
a
.
fgraph
.
toposort
()
.
index
(
a
),
topo
.
index
(
a
),
self
.
apply_callcount
[
a
])
self
.
apply_callcount
[
a
]))
for
a
,
t
in
self
.
apply_time
.
items
()]
del
topos
atimes
.
sort
()
atimes
.
sort
()
atimes
.
reverse
()
atimes
.
reverse
()
tot
=
0
tot
=
0
...
...
theano/misc/check_blas.py
浏览文件 @
12ec7339
...
@@ -203,6 +203,7 @@ if __name__ == "__main__":
...
@@ -203,6 +203,7 @@ if __name__ == "__main__":
cuda version 5.5 5.0 4.2 4.1 4.0 3.2 3.0 # note
cuda version 5.5 5.0 4.2 4.1 4.0 3.2 3.0 # note
gpu
gpu
K6000/NOECC 0.06s
K20m/ECC 0.07s
K20m/ECC 0.07s
K20/NOECC 0.07s
K20/NOECC 0.07s
M2090 0.19s
M2090 0.19s
...
...
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
12ec7339
...
@@ -3,12 +3,12 @@ import os
...
@@ -3,12 +3,12 @@ import os
import
numpy
import
numpy
import
theano
import
theano
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
Op
,
Apply
from
theano
import
tensor
,
scalar
,
config
from
theano
import
tensor
,
scalar
,
config
from
theano.scalar
import
Scalar
from
theano.scalar
import
Scalar
from
theano.tensor.basic
import
Alloc
from
theano.tensor.basic
import
Alloc
from
theano.gof.python25
import
a
ll
,
a
ny
from
theano.gof.python25
import
any
from
theano.gof.utils
import
MethodNotDefined
from
theano.gof.utils
import
MethodNotDefined
from
theano.compat
import
PY3
from
theano.compat
import
PY3
...
@@ -257,7 +257,7 @@ class GpuFromHost(Op):
...
@@ -257,7 +257,7 @@ class GpuFromHost(Op):
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
ev
,
=
eval_points
ev
,
=
eval_points
if
isintance
(
ev
,
GpuArrayType
):
if
isin
s
tance
(
ev
,
GpuArrayType
):
return
[
host_from_gpu
(
ev
)]
return
[
host_from_gpu
(
ev
)]
else
:
else
:
return
ev
return
ev
...
@@ -317,7 +317,7 @@ class GpuFromCuda(Op):
...
@@ -317,7 +317,7 @@ class GpuFromCuda(Op):
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
ev
,
=
eval_points
ev
,
=
eval_points
if
isintance
(
ev
,
GpuArrayType
):
if
isin
s
tance
(
ev
,
GpuArrayType
):
return
[
cuda_from_gpu
(
ev
)]
return
[
cuda_from_gpu
(
ev
)]
else
:
else
:
return
ev
return
ev
...
@@ -651,6 +651,36 @@ class GpuAlloc(HideC, Alloc):
...
@@ -651,6 +651,36 @@ class GpuAlloc(HideC, Alloc):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
2
,)
return
(
2
,)
def
do_constant_folding
(
self
,
node
):
for
client
in
node
.
outputs
[
0
]
.
clients
:
if
client
[
0
]
==
'output'
:
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return
False
elif
(
#The following ops work inplace of their input id 0.
client
[
1
]
==
0
and
isinstance
(
client
[
0
]
.
op
,
(
#Ops that will work inplace on the Alloc. So if they
#get constant_folded, they would copy the
#constant and this is less efficients.
#Not doing the constant folding could also lower
#the peak memory usage, as we the "constant" won't
#always exists.
#theano.tensor.subtensor.AdvancedIncSubtensor,
theano
.
sandbox
.
gpuarray
.
subtensor
.
GpuIncSubtensor
,
#theano.sandbox.gpuarray.subtensor.GpuAdvancedIncSubtensor1,
theano
.
sandbox
.
gpuarray
.
blas
.
GpuGemm
,
theano
.
sandbox
.
gpuarray
.
blas
.
GpuGemv
,
#theano.sandbox.gpuarray.blas.GpuGer, Not Yet implemented
))):
return
False
#If the clients is a transfer, we don't want to fold. We
#let the moving opt finish before deciding what to do.
elif
isinstance
(
client
[
0
]
.
op
,
HostFromGpu
):
return
False
return
True
gpu_alloc
=
GpuAlloc
()
gpu_alloc
=
GpuAlloc
()
...
...
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
12ec7339
...
@@ -32,11 +32,13 @@ if not theano.sandbox.gpuarray.pygpu_activated:
...
@@ -32,11 +32,13 @@ if not theano.sandbox.gpuarray.pygpu_activated:
from
theano.sandbox.gpuarray.type
import
(
GpuArrayType
,
from
theano.sandbox.gpuarray.type
import
(
GpuArrayType
,
gpuarray_shared_constructor
)
gpuarray_shared_constructor
)
from
theano.sandbox.gpuarray.basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
from
theano.sandbox.gpuarray.basic_ops
import
(
gpu_alloc
,
gpu_from_cuda
,
host_from_gpu
,
gpu_from_host
,
cuda_from_gpu
,
HostFromGpu
,
gpu_alloc
,
GpuAlloc
,
GpuFromHost
,
GpuReshape
,
gpu_from_cuda
,
GpuEye
)
cuda_from_gpu
,
HostFromGpu
,
GpuFromHost
,
GpuReshape
,
GpuEye
)
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
utt
.
seed_rng
()
utt
.
seed_rng
()
...
@@ -290,6 +292,13 @@ GpuAllocTester = makeTester(
...
@@ -290,6 +292,13 @@ GpuAllocTester = makeTester(
)
)
class
TestAlloc
(
theano
.
tensor
.
tests
.
test_basic
.
TestAlloc
):
dtype
=
"float32"
mode
=
mode_with_gpu
shared
=
staticmethod
(
gpuarray_shared_constructor
)
allocs
=
[
GpuAlloc
,
GpuAlloc
,
T
.
Alloc
]
def
test_shape
():
def
test_shape
():
x
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
[
False
,
False
,
False
])()
x
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
[
False
,
False
,
False
])()
v
=
gpuarray
.
zeros
((
3
,
4
,
5
),
dtype
=
'float32'
)
v
=
gpuarray
.
zeros
((
3
,
4
,
5
),
dtype
=
'float32'
)
...
...
theano/tensor/basic.py
浏览文件 @
12ec7339
...
@@ -1369,6 +1369,9 @@ class MaxAndArgmax(Op):
...
@@ -1369,6 +1369,9 @@ class MaxAndArgmax(Op):
"""
%
locals
()
"""
%
locals
()
ret
=
"""
ret
=
"""
int axis;
int axis;
Py_CLEAR(
%(max)
s);
Py_CLEAR(
%(argmax)
s);//todo pass them as out parameter.
%(axis_code)
s
%(axis_code)
s
%(max)
s = (PyArrayObject*)PyArray_Max(
%(x)
s, axis, NULL);
%(max)
s = (PyArrayObject*)PyArray_Max(
%(x)
s, axis, NULL);
if(
%(max)
s == NULL){
if(
%(max)
s == NULL){
...
@@ -1407,7 +1410,7 @@ class MaxAndArgmax(Op):
...
@@ -1407,7 +1410,7 @@ class MaxAndArgmax(Op):
return
ret
%
locals
()
return
ret
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
2
,)
return
(
3
,)
def
infer_shape
(
self
,
node
,
shapes
):
def
infer_shape
(
self
,
node
,
shapes
):
ishape
,
axis_shape
=
shapes
ishape
,
axis_shape
=
shapes
...
...
theano/tensor/opt.py
浏览文件 @
12ec7339
...
@@ -4049,8 +4049,8 @@ def constant_folding(node):
...
@@ -4049,8 +4049,8 @@ def constant_folding(node):
return
rval
return
rval
register_canonicalize
(
constant_folding
,
'fast_compile'
)
register_canonicalize
(
constant_folding
,
'fast_compile'
)
register_stabilize
(
constant_folding
)
register_stabilize
(
constant_folding
,
'fast_compile'
)
register_specialize
(
constant_folding
)
register_specialize
(
constant_folding
,
'fast_compile'
)
def
_is_1
(
expr
):
def
_is_1
(
expr
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论