Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1f34a482
提交
1f34a482
authored
3月 29, 2017
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove tentacles in tensor.
上级
1b22389f
隐藏空白字符变更
内嵌
并排
正在显示
14 个修改的文件
包含
92 行增加
和
278 行删除
+92
-278
test_others.py
theano/gpuarray/tests/test_others.py
+9
-3
test_type.py
theano/gpuarray/tests/test_type.py
+40
-0
basic.py
theano/tensor/basic.py
+0
-16
blas.py
theano/tensor/blas.py
+1
-1
elemwise.py
theano/tensor/elemwise.py
+0
-1
Conv3D.py
theano/tensor/nnet/Conv3D.py
+1
-4
conv.py
theano/tensor/nnet/conv.py
+0
-5
conv3d2d.py
theano/tensor/nnet/conv3d2d.py
+2
-65
test_conv3d2d.py
theano/tensor/nnet/tests/test_conv3d2d.py
+6
-22
subtensor.py
theano/tensor/subtensor.py
+2
-2
shape_opt_cycle.pkl
theano/tensor/tests/shape_opt_cycle.pkl
+0
-0
test_opt.py
theano/tensor/tests/test_opt.py
+18
-121
test_sharedvar.py
theano/tensor/tests/test_sharedvar.py
+10
-34
type.py
theano/tensor/type.py
+3
-4
没有找到文件。
theano/gpuarray/tests/test_others.py
浏览文件 @
1f34a482
from
.config
import
test_ctx_name
from
.config
import
test_ctx_name
,
mode_with_gpu
from
..type
import
get_context
,
GpuArrayType
,
GpuArraySharedVariable
from
..type
import
(
get_context
,
GpuArrayType
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
)
import
pygpu
import
numpy
as
np
from
theano.misc.tests.test_may_share_memory
import
may_share_memory_core
from
theano.misc.pkl_utils
import
dump
,
load
from
theano.tensor.tests.test_opt
import
test_fusion
as
t_fusion
class
test_fusion
(
t_fusion
):
mode
=
mode_with_gpu
shared
=
gpuarray_shared_constructor
def
test_may_share_memory
():
ctx
=
get_context
(
test_ctx_name
)
...
...
theano/gpuarray/tests/test_type.py
浏览文件 @
1f34a482
...
...
@@ -9,6 +9,8 @@ from theano import config
from
theano.compile
import
DeepCopyOp
from
theano.misc.pkl_utils
import
CompatUnpickler
from
theano.tensor.tests.test_sharedvar
import
makeSharedTester
from
.config
import
test_ctx_name
from
.test_basic_ops
import
rand_gpuarray
from
..type
import
GpuArrayType
,
gpuarray_shared_constructor
...
...
@@ -76,3 +78,41 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
assert
np
.
asarray
(
mat
)[
0
]
==
-
42.0
finally
:
config
.
experimental
.
unpickle_gpu_on_cpu
=
oldflag
test_shared_options
=
makeSharedTester
(
shared_constructor_
=
gpuarray_shared_constructor
,
dtype_
=
theano
.
config
.
floatX
,
get_value_borrow_true_alias_
=
True
,
shared_borrow_true_alias_
=
True
,
set_value_borrow_true_alias_
=
True
,
set_value_inplace_
=
True
,
set_cast_value_inplace_
=
False
,
shared_constructor_accept_ndarray_
=
True
,
internal_type_
=
lambda
v
:
pygpu
.
array
(
v
,
context
=
get_context
(
test_ctx_name
),
cls
=
pygpu
.
_array
.
ndgpuarray
),
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
pygpu
.
gpuarray
.
GpuArray
),
theano_fct_
=
theano
.
tensor
.
exp
,
ref_fct_
=
numpy
.
exp
,
cast_value_
=
lambda
v
:
pygpu
.
asarray
(
v
,
context
=
get_context
(
test_ctx_name
),
cls
=
pygpu
.
_array
.
ndgpuarray
),
name
=
'test_shared_options'
)
test_shared_options2
=
makeSharedTester
(
shared_constructor_
=
gpuarray_shared_constructor
dtype_
=
theano
.
config
.
floatX
,
get_value_borrow_true_alias_
=
False
,
shared_borrow_true_alias_
=
False
,
set_value_borrow_true_alias_
=
False
,
set_value_inplace_
=
True
,
set_cast_value_inplace_
=
True
,
shared_constructor_accept_ndarray_
=
True
,
internal_type_
=
lambda
v
:
pygpu
.
array
(
v
,
context
=
get_context
(
test_ctx_name
),
cls
=
pygpu
.
_array
.
ndgpuarray
),
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
pygpu
.
gpuarray
.
GpuArray
),
theano_fct_
=
theano
.
tensor
.
exp
,
ref_fct_
=
numpy
.
exp
,
cast_value_
=
lambda
v
:
pygpu
.
asarray
(
v
,
context
=
get_context
(
test_ctx_name
),
cls
=
pygpu
.
_array
.
ndgpuarray
),
name
=
'test_shared_options2'
)
theano/tensor/basic.py
浏览文件 @
1f34a482
...
...
@@ -107,22 +107,6 @@ def __oplist_tag(thing, tag):
thing
.
__oplist_tags
=
tags
if
0
:
# this starts to feel like we're enumerating all the types
# the one place where this is used we should also allow for sparse
# variables
# - JB 20100226
def
as_cuda_or_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
"""
Do the same as_tensor_variable,
but do not transfer the value on the gpu.
"""
if
hasattr
(
x
,
'_as_CudaNdarrayVariable'
):
# TODO: pass name and ndim arguments
return
x
.
_as_CudaNdarrayVariable
()
return
as_tensor_variable
(
x
,
name
,
ndim
)
def
as_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
"""Return `x`, transformed into a `TensorType`.
...
...
theano/tensor/blas.py
浏览文件 @
1f34a482
...
...
@@ -15,7 +15,7 @@ There are four kinds of BLAS Ops in Theano:
- Python implementations (this file)
- SciPy-based (blas_scipy)
- C-based (blas_c)
-
CUDA-based (theano.sandbox.cuda.blas
)
-
GPU-based (theano.gpuarray
)
Notes
-----
...
...
theano/tensor/elemwise.py
浏览文件 @
1f34a482
from
__future__
import
absolute_import
,
print_function
,
division
import
sys
from
copy
import
copy
import
numpy
as
np
...
...
theano/tensor/nnet/Conv3D.py
浏览文件 @
1f34a482
...
...
@@ -573,10 +573,7 @@ def conv3D(V, W, b, d):
The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization.
The GPU implementation is very slow. You should use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead.
Please use nnet.conv3d instead of this for a faster GPU implementation.
See Also
--------
...
...
theano/tensor/nnet/conv.py
浏览文件 @
1f34a482
...
...
@@ -903,7 +903,6 @@ class ConvOp(OpenMPOp):
newin
=
inputs
.
dimshuffle
((
1
,
0
,
2
,
3
))
newgz
=
gz
.
dimshuffle
((
1
,
0
,
2
,
3
))
un_p
=
self
.
unroll_patch
if
self
.
out_mode
==
'valid'
:
(
img
,
filters
)
=
(
newin
,
newgz
)
kshp_logical
=
self
.
fulloutshp
...
...
@@ -912,8 +911,6 @@ class ConvOp(OpenMPOp):
(
bsize
,
nkern
)
=
(
self
.
imshp
[
0
],
self
.
nkern
)
imshp
=
(
self
.
bsize
,
self
.
imshp
[
1
],
self
.
imshp
[
2
])
kshp
=
self
.
outshp
un_b
=
self
.
unroll_batch
un_k
=
self
.
unroll_kern
elif
self
.
out_mode
==
'full'
:
(
img
,
filters
)
=
(
newgz
,
newin
)
kshp_logical
=
None
...
...
@@ -924,8 +921,6 @@ class ConvOp(OpenMPOp):
(
bsize
,
nkern
)
=
(
self
.
nkern
,
self
.
imshp
[
0
])
imshp
=
(
self
.
bsize
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
kshp
=
self
.
imshp
[
1
:]
un_b
=
self
.
unroll_kern
un_k
=
self
.
unroll_batch
else
:
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
...
...
theano/tensor/nnet/conv3d2d.py
浏览文件 @
1f34a482
...
...
@@ -4,7 +4,6 @@ from theano.gradient import DisconnectedType
from
theano.gof
import
Op
,
Apply
,
TopoOptimizer
from
theano.gof.opt
import
copy_stack_trace
from
theano
import
tensor
import
theano.sandbox.cuda
as
cuda
def
get_diagonal_subtensor_view
(
x
,
i0
,
i1
):
...
...
@@ -16,7 +15,7 @@ def get_diagonal_subtensor_view(x, i0, i1):
It returns a partial view of x, not a partial copy.
"""
# We have to cast i0 and i0 to int because python
2.4 (and maybe later)
# We have to cast i0 and i0 to int because python
# do not support indexing with 0-dim, 'int*' ndarrays.
i0
=
int
(
i0
)
i1
=
int
(
i1
)
...
...
@@ -198,8 +197,7 @@ def conv3d(signals, filters,
Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column)
For the GPU, you can use this implementation or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
For the GPU, use nnet.conv3d.
See Also
--------
...
...
@@ -295,67 +293,6 @@ def conv3d(signals, filters,
return
out_5d
def
make_gpu_optimizer
(
op
,
to_gpu
):
"""
This function create optimizer that move some inputs to the GPU
for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value
are needed.
We suppose the same op work with CPU and GPU inputs.
Parameters
----------
op
The op that support GPU inputs.
to_gpu
A list of op inputs that are moved to the GPU.
"""
@theano.gof.local_optimizer
([
op
,
cuda
.
gpu_from_host
])
def
local_to_gpu
(
node
):
"""
op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host)
"""
if
isinstance
(
node
.
op
,
op
):
# op(host_from_gpu()) -> host_from_gpu(op)
# If any of the input that go on the GPU are on the GPU,
# move the op to the gpu.
if
any
(
node
.
inputs
[
idx
]
.
owner
and
isinstance
(
node
.
inputs
[
idx
]
.
owner
.
op
,
cuda
.
HostFromGpu
)
for
idx
in
to_gpu
):
new_inp
=
list
(
node
.
inputs
)
for
idx
in
to_gpu
:
new_inp
[
idx
]
=
cuda
.
gpu_from_host
(
new_inp
[
idx
])
result_node
=
op
()(
*
new_inp
)
copy_stack_trace
(
node
.
outputs
[
0
],
result_node
)
transfer_node
=
result_node
.
transfer
(
'cpu'
)
copy_stack_trace
(
node
.
outputs
[
0
],
transfer_node
)
return
[
transfer_node
]
if
node
.
op
==
cuda
.
gpu_from_host
:
# gpu_from_host(op) -> op(gpu_from_host)
host_input
=
node
.
inputs
[
0
]
if
host_input
.
owner
and
isinstance
(
host_input
.
owner
.
op
,
op
):
op_node
=
host_input
.
owner
new_inp
=
list
(
op_node
.
inputs
)
for
idx
in
to_gpu
:
new_inp
[
idx
]
=
cuda
.
gpu_from_host
(
new_inp
[
idx
])
new_node
=
op
()(
*
new_inp
)
copy_stack_trace
(
host_input
,
new_node
)
return
[
new_node
]
return
False
local_to_gpu
.
__name__
=
"local_to_gpu_"
+
op
.
__name__
cuda
.
opt
.
register_opt
()(
local_to_gpu
)
if
cuda
.
cuda_available
:
make_gpu_optimizer
(
DiagonalSubtensor
,
[
0
])
make_gpu_optimizer
(
IncDiagonalSubtensor
,
[
0
,
3
])
@theano.gof.local_optimizer
([
DiagonalSubtensor
,
IncDiagonalSubtensor
])
def
local_inplace_DiagonalSubtensor
(
node
):
"""Also work for IncDiagonalSubtensor."""
...
...
theano/tensor/nnet/tests/test_conv3d2d.py
浏览文件 @
1f34a482
...
...
@@ -16,12 +16,6 @@ from theano.tensor.nnet.conv3d2d import conv3d, get_diagonal_subtensor_view, Dia
import
theano.tests.unittest_tools
as
utt
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
else
:
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
def
test_get_diagonal_subtensor_view
(
wrap
=
lambda
a
:
a
):
x
=
numpy
.
arange
(
20
)
.
reshape
(
5
,
4
)
.
astype
(
'float32'
)
x
=
wrap
(
x
)
...
...
@@ -106,17 +100,11 @@ def check_diagonal_subtensor_view_traces(fn):
@parameterized.expand
((
'valid'
,
'full'
,
'half'
),
utt
.
custom_name_func
)
def
test_conv3d
(
border_mode
):
check_conv3d
(
border_mode
=
border_mode
,
mode
=
mode_without_gpu
,
shared
=
theano
.
tensor
.
_shared
)
# This function will also be used in theano/sandbox/cuda/tests/test_tensor_op.py,
# which is not possible if it is decorated by @parameterized.expand
def
check_conv3d
(
border_mode
,
mode
=
mode_without_gpu
,
shared
=
theano
.
tensor
.
_shared
):
if
ndimage
is
None
or
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"conv3d2d tests need SciPy and a c++ compiler"
)
shared
=
theano
.
tensor
.
_shared
Ns
,
Ts
,
C
,
Hs
,
Ws
=
3
,
10
,
3
,
32
,
32
Nf
,
Tf
,
C
,
Hf
,
Wf
=
32
,
5
,
3
,
5
,
5
...
...
@@ -137,8 +125,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
border_mode
=
border_mode
)
newconv3d
=
theano
.
function
([],
[],
updates
=
{
s_output
:
out
},
mode
=
mode
)
updates
=
{
s_output
:
out
})
check_diagonal_subtensor_view_traces
(
newconv3d
)
t0
=
time
.
time
()
...
...
@@ -149,7 +136,6 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
gnewconv3d
=
theano
.
function
([],
[],
updates
=
[(
s_filters
,
gfilters
),
(
s_signals
,
gsignals
)],
mode
=
mode
,
name
=
'grad'
)
check_diagonal_subtensor_view_traces
(
gnewconv3d
)
...
...
@@ -163,7 +149,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
signals
=
numpy
.
random
.
rand
(
Ns
,
Ts
,
C
,
Hs
,
Ws
)
.
astype
(
'float32'
)
filters
=
numpy
.
random
.
rand
(
Nf
,
Tf
,
C
,
Hf
,
Wf
)
.
astype
(
'float32'
)
utt
.
verify_grad
(
lambda
s
,
f
:
conv3d
(
s
,
f
,
border_mode
=
border_mode
),
[
signals
,
filters
],
eps
=
1e-1
,
mode
=
mode
)
[
signals
,
filters
],
eps
=
1e-1
)
# Additional Test that covers the case of patched implementation for filter with Tf=1
Ns
,
Ts
,
C
,
Hs
,
Ws
=
3
,
10
,
3
,
32
,
32
...
...
@@ -186,8 +172,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
border_mode
=
border_mode
)
newconv3d
=
theano
.
function
([],
[],
updates
=
{
s_output
:
out
},
mode
=
mode
)
updates
=
{
s_output
:
out
})
t0
=
time
.
time
()
newconv3d
()
...
...
@@ -197,7 +182,6 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
gnewconv3d
=
theano
.
function
([],
[],
updates
=
[(
s_filters
,
gfilters
),
(
s_signals
,
gsignals
)],
mode
=
mode
,
name
=
'grad'
)
t0
=
time
.
time
()
...
...
@@ -210,4 +194,4 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
signals
=
numpy
.
random
.
rand
(
Ns
,
Ts
,
C
,
Hs
,
Ws
)
.
astype
(
'float32'
)
filters
=
numpy
.
random
.
rand
(
Nf
,
Tf
,
C
,
Hf
,
Wf
)
.
astype
(
'float32'
)
utt
.
verify_grad
(
lambda
s
,
f
:
conv3d
(
s
,
f
,
border_mode
=
border_mode
),
[
signals
,
filters
],
eps
=
1e-1
,
mode
=
mode
)
[
signals
,
filters
],
eps
=
1e-1
)
theano/tensor/subtensor.py
浏览文件 @
1f34a482
...
...
@@ -640,7 +640,7 @@ class Subtensor(Op):
strides_mul
=
None
):
"""
The parameters c_prefix are there to allow reusing this
function on PyArray and
CudaNda
rray object.
function on PyArray and
GpuA
rray object.
This fct take as input the x.
...
...
@@ -1373,7 +1373,7 @@ class IncSubtensor(Op):
# but subclasses may override the helper methods
# to change the particulars, e.g. GpuIncSubtensor
# turns the view/copy operations on numpy arrays
# into the same operations on
cuda
arrays.
# into the same operations on
gpu
arrays.
self
.
do_type_checking
(
node
)
...
...
theano/tensor/tests/shape_opt_cycle.pkl
deleted
100644 → 0
浏览文件 @
1b22389f
File deleted
theano/tensor/tests/test_opt.py
浏览文件 @
1f34a482
...
...
@@ -2,8 +2,6 @@ from __future__ import absolute_import, print_function, division
import
copy
import
logging
import
os
import
sys
import
time
import
unittest
...
...
@@ -14,7 +12,7 @@ from nose.tools import assert_raises, assert_true
import
theano
import
theano.scalar
as
scal
from
six
import
PY3
,
StringIO
from
six
import
StringIO
from
theano
import
compile
from
theano.compile
import
deep_copy_op
,
DeepCopyOp
from
theano.compile
import
get_mode
...
...
@@ -909,7 +907,10 @@ def test_const_type_in_mul_canonizer():
class
test_fusion
(
unittest
.
TestCase
):
def
do
(
self
,
mode
,
shared_fn
,
shp
,
gpu
=
False
,
nb_repeat
=
1
,
assert_len_topo
=
True
,
slice
=
None
):
mode
=
copy
.
copy
(
compile
.
mode
.
get_default_mode
())
_shared
=
shared
def
do
(
self
,
mode
,
shared_fn
,
shp
,
nb_repeat
=
1
,
assert_len_topo
=
True
,
slice
=
None
):
"""
param shared_fn: if None, will use compile.function
verify that the elemwise fusion work
...
...
@@ -1103,14 +1104,9 @@ class test_fusion(unittest.TestCase):
nb_elemwise
,
answer
,
out_dtype
]
in
enumerate
(
cases
):
if
isinstance
(
out_dtype
,
dict
):
out_dtype
=
out_dtype
[
config
.
cast_policy
]
if
(
gpu
and
(
out_dtype
!=
'float32'
or
any
(
i
.
dtype
!=
'float32'
for
i
in
g
.
owner
.
inputs
))):
print
(
"Skip test
%
d as the gpu code currently supports only float32"
%
id
)
continue
print
(
"new cases"
,
id
)
if
shared_fn
is
None
:
assert
gpu
is
False
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
for
x
in
xrange
(
nb_repeat
):
out
=
f
(
*
val_inputs
)
...
...
@@ -1139,17 +1135,7 @@ class test_fusion(unittest.TestCase):
print
(
out
)
print
(
answer
*
nb_repeat
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
if
gpu
:
import
theano.sandbox.cuda
as
cuda
topo_
=
[
x
for
x
in
topo
if
not
isinstance
(
x
.
op
,
(
cuda
.
basic_ops
.
GpuFromHost
,
cuda
.
basic_ops
.
HostFromGpu
))]
gpu_
=
[
x
for
x
in
topo
if
isinstance
(
x
.
op
,
cuda
.
basic_ops
.
GpuFromHost
)]
if
not
len
(
gpu_
)
==
len
(
sym_inputs
):
fail2
.
append
((
id
,
gpu_
,
sym_inputs
))
else
:
topo_
=
topo
topo_
=
topo
if
assert_len_topo
:
if
not
len
(
topo_
)
==
nb_elemwise
:
fail3
.
append
((
id
,
topo_
,
nb_elemwise
))
...
...
@@ -1177,62 +1163,24 @@ class test_fusion(unittest.TestCase):
def
test_elemwise_fusion
(
self
):
shp
=
(
5
,
5
)
mode
=
copy
.
copy
(
compile
.
mode
.
get_default_mode
()
)
mode
=
copy
.
copy
(
self
.
mode
)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode
.
_optimizer
=
mode
.
_optimizer
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
)
self
.
do
(
mode
,
shared
,
shp
)
self
.
do
(
mode
,
s
elf
.
_s
hared
,
shp
)
@attr
(
'slow'
)
def
test_elemwise_fusion_4d
(
self
):
shp
=
(
3
,
3
,
3
,
3
)
mode
=
copy
.
copy
(
compile
.
mode
.
get_default_mode
()
)
mode
=
copy
.
copy
(
self
.
mode
)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode
.
_optimizer
=
mode
.
_optimizer
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
)
self
.
do
(
mode
,
shared
,
shp
)
def
test_gpu_fusion
(
self
):
shp
=
(
5
,
5
)
# we need the optimisation enabled, debug do this.
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
mode
.
get_mode
(
"FAST_RUN"
)
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
,
'gpu'
)
else
:
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
,
'gpu'
)
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"cuda not available"
)
self
.
do
(
mode
,
cuda
.
float32_shared_constructor
,
shp
,
gpu
=
True
)
@attr
(
'slow'
)
def
test_gpu_fusion_Xd
(
self
):
# we need the optimisation enabled, debug do this.
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
mode
.
get_mode
(
"FAST_RUN"
)
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
,
'gpu'
)
else
:
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'local_elemwise_fusion'
,
'composite_elemwise_fusion'
,
'canonicalize'
,
'gpu'
)
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"cuda not available"
)
sizes
=
cuda
.
opt
.
get_device_type_sizes
()
if
sizes
[
'int_size'
]
==
4
:
shp
=
(
5
,
5
,
5
,
5
)
else
:
shp
=
(
5
,
5
,
5
)
self
.
do
(
mode
,
cuda
.
float32_shared_constructor
,
shp
,
gpu
=
True
)
self
.
do
(
mode
,
self
.
_shared
,
shp
)
def
test_fusion_35inputs
(
self
):
# Make sure a fused graph with more than 35 inputs does not segfault
...
...
@@ -1244,7 +1192,7 @@ class test_fusion(unittest.TestCase):
for
idx
in
xrange
(
1
,
35
):
out
=
tensor
.
sin
(
inpts
[
idx
]
+
out
)
f
=
function
(
inpts
,
out
)
f
=
function
(
inpts
,
out
,
mode
=
self
.
mode
)
# Test it on some dummy values
f
(
*
[
list
(
range
(
i
,
4
+
i
))
for
i
in
xrange
(
35
)])
...
...
@@ -1280,7 +1228,7 @@ class test_fusion(unittest.TestCase):
dlogp
=
function
(
vars
,
[
theano
.
grad
(
logp
,
v
)
for
v
in
vars
])
dlogp
(
2
,
np
.
random
.
rand
(
n
))
def
speed_fusion
(
self
,
s
hared_fn
=
shared
,
gpu
=
False
,
s
=
None
):
def
speed_fusion
(
self
,
s
=
None
):
"""
param type s: a slice object
param s: a slice to apply to the case to execute. If None, exec all case.
...
...
@@ -1292,18 +1240,18 @@ class test_fusion(unittest.TestCase):
# linker=gof.CLinker
# linker=gof.OpWiseCLinker
mode1
=
copy
.
copy
(
compile
.
get_default_mode
()
)
mode1
=
copy
.
copy
(
self
.
mode
)
mode1
.
_optimizer
=
mode1
.
_optimizer
.
including
(
'local_elemwise_fusion'
)
# TODO:clinker is much faster... but use to much memory
# Possible cause: as their is do deletion of intermediate value when we don't keep the fct.
# More plausible cause: we keep a link to the output data?
# Follow up. Clinker do the same... second cause?
mode2
=
copy
.
copy
(
compile
.
get_default_mode
()
)
mode2
=
copy
.
copy
(
self
.
mode
)
mode2
.
_optimizer
=
mode2
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
print
(
"test with linker"
,
str
(
mode1
.
linker
))
times1
=
self
.
do
(
mode1
,
s
hared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
times1
=
self
.
do
(
mode1
,
s
elf
.
_shared
,
shp
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
times2
=
self
.
do
(
mode2
,
s
hared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
times2
=
self
.
do
(
mode2
,
s
elf
.
_shared
,
shp
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
print
(
"times1 with local_elemwise_fusion"
)
print
(
times1
,
times1
.
min
(),
times1
.
max
(),
times1
.
sum
())
...
...
@@ -1317,7 +1265,7 @@ class test_fusion(unittest.TestCase):
"mean"
,
d
.
mean
(),
"std"
,
d
.
std
())
def
test_fusion_inplace
(
self
):
mode
=
copy
.
copy
(
compile
.
mode
.
get_default_mode
()
)
mode
=
copy
.
copy
(
self
.
mode
)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode
.
_optimizer
=
mode
.
_optimizer
.
including
(
...
...
@@ -1332,14 +1280,9 @@ class test_fusion(unittest.TestCase):
f
(
np
.
random
.
random
((
5
,
5
)),
np
.
random
.
random
((
5
,
5
)),
np
.
random
.
random
((
5
,
5
)))
def
speed_fusion_gpu
(
self
):
import
theano.sandbox.cuda
as
cuda
self
.
speed_fusion
(
shared_fn
=
cuda
.
float32_shared_constructor
,
gpu
=
True
,
s
=
slice
(
0
,
15
))
def
speed_log_exp
(
self
):
s
=
slice
(
31
,
36
)
print
(
"time"
,
self
.
do
(
None
,
shared
,
shp
=
(
1000
,
1000
),
gpu
=
False
,
print
(
"time"
,
self
.
do
(
self
.
mode
,
self
.
_shared
,
shp
=
(
1000
,
1000
)
,
assert_len_topo
=
False
,
slice
=
s
,
nb_repeat
=
100
))
def
tes_memory_leak
(
self
,
mode
=
compile
.
mode
.
Mode
(
'c'
,
'merge'
),
...
...
@@ -1505,27 +1448,6 @@ class TestCompositeCodegen(unittest.TestCase):
fval
=
f
([
1
,
2
,
3
])
assert
np
.
all
(
fval
==
[
6
,
12
,
18
])
def
test_nested_gpu
(
self
):
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"cuda not available"
)
import
theano.sandbox.cuda.opt
y
=
self
.
times_2
(
self
.
x
)
z
=
self
.
times_3
(
y
)
f
=
theano
.
function
(
[
self
.
x
],
cuda
.
gpu_from_host
(
z
),
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
))
topo
=
f
.
maker
.
fgraph
.
toposort
()
if
config
.
mode
!=
"FAST_COMPILE"
:
assert
len
(
topo
)
==
2
assert
topo
[
1
]
.
op
==
cuda
.
gpu_from_host
# topo1 is doing the composite work on the CPU. Auto-generation of
# GPU code for ops with support code is not possible.
fval
=
np
.
asarray
(
f
([
1
,
2
,
3
]))
assert
np
.
all
(
fval
==
[
6
,
12
,
18
]),
fval
def
test_local_useless_composite
(
self
):
x
=
theano
.
scalar
.
float32
()
c
=
theano
.
scalar
.
Composite
([
x
],
[
x
+
1
,
x
-
1
])
...
...
@@ -4213,31 +4135,6 @@ class test_shapeoptimizer(unittest.TestCase):
f
=
theano
.
function
([
X
],
expr
,
mode
=
mode
)
print
(
f
([[
1
,
2
],
[
2
,
3
]]))
def
test_no_cycle
(
self
):
# Optimizing this graph resulted in a cycle, see gh-1549
# This test depends on cuda
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"cuda not available"
)
if
sys
.
version_info
[:
2
]
<
(
2
,
5
):
raise
SkipTest
(
"Test skipped due to a too old python"
)
# This pickle file has undergone manual surgery due to changes
# in scan and may or may not run correctly. It does passes
# the test below.
pkl_filename
=
os
.
path
.
join
(
os
.
path
.
dirname
(
theano
.
__file__
),
'tensor'
,
'tests'
,
'shape_opt_cycle.pkl'
)
# Due to incompatibilities between python 2 and 3 in the format
# of pickled numpy ndarray, we have to force an encoding
from
theano.misc.pkl_utils
import
CompatUnpickler
with
open
(
pkl_filename
,
"rb"
)
as
pkl_file
:
if
PY3
:
u
=
CompatUnpickler
(
pkl_file
,
encoding
=
"latin1"
)
else
:
u
=
CompatUnpickler
(
pkl_file
)
fn_args
=
u
.
load
()
theano
.
function
(
**
fn_args
)
class
test_assert
(
utt
.
InferShapeTester
):
...
...
theano/tensor/tests/test_sharedvar.py
浏览文件 @
1f34a482
...
...
@@ -25,7 +25,6 @@ def makeSharedTester(shared_constructor_,
theano_fct_
,
ref_fct_
,
cast_value_
=
np
.
asarray
,
op_by_matrix_
=
False
,
name
=
None
,
):
"""
...
...
@@ -49,7 +48,6 @@ def makeSharedTester(shared_constructor_,
:param theano_fct_: A theano op that will be used to do some computation on the shared variable
:param ref_fct_: A reference function that should return the same value as the theano_fct_
:param cast_value_: A callable that cast an ndarray into the internal shared variable representation
:param op_by_matrix_: When we do inplace operation on the an internal type object, should we do it with a scalar or a matrix of the same value.
:param name: This string is used to set the returned class' __name__
attribute. This is needed for nosetests to properly tag the
test with its correct name, rather than use the generic
...
...
@@ -75,7 +73,6 @@ def makeSharedTester(shared_constructor_,
set_cast_value_inplace
=
set_cast_value_inplace_
shared_constructor_accept_ndarray
=
shared_constructor_accept_ndarray_
cast_value
=
staticmethod
(
cast_value_
)
op_by_matrix
=
op_by_matrix_
def
test_shared_dont_alias
(
self
):
dtype
=
self
.
dtype
...
...
@@ -96,11 +93,7 @@ def makeSharedTester(shared_constructor_,
assert
np
.
allclose
(
self
.
ref_fct
(
x
),
total_val
)
values_to_div
=
.
5
if
self
.
op_by_matrix
:
values_to_div
=
self
.
internal_type
(
np
.
ones
(
x
.
shape
,
dtype
=
dtype
)
/
2
)
# supported for cudandarray, but not ndarray.
assert
self
.
test_internal_type
(
values_to_div
)
x
/=
values_to_div
x
/=
.
5
total_val_2
=
total_func
()
# value used to construct should not alias with internal
...
...
@@ -108,7 +101,7 @@ def makeSharedTester(shared_constructor_,
x
=
x_shared
.
get_value
(
borrow
=
False
)
x
/=
values_to_div
x
/=
.
5
total_val_3
=
total_func
()
...
...
@@ -117,7 +110,7 @@ def makeSharedTester(shared_constructor_,
# in this case we can alias
x
=
x_shared
.
get_value
(
borrow
=
True
)
x
/=
values_to_div
x
/=
.
5
# this is not required by the contract but it is a feature we've
# implemented for some type of SharedVariable.
...
...
@@ -189,12 +182,7 @@ def makeSharedTester(shared_constructor_,
x
=
x_shared
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
assert
self
.
test_internal_type
(
x
)
values_to_div
=
.
5
if
self
.
op_by_matrix
:
# supported for cudandarray, but not ndarray.
values_to_div
=
self
.
internal_type
(
np
.
ones
(
x
.
shape
,
dtype
=
dtype
)
/
2
)
x
/=
values_to_div
# supported by ndarray and CudaNdarray
x
/=
.
5
# this is not required by the contract but it is a feature we can
# implement for some type of SharedVariable.
...
...
@@ -203,7 +191,7 @@ def makeSharedTester(shared_constructor_,
x
=
x_shared
.
get_value
(
borrow
=
False
,
return_internal_type
=
True
)
assert
self
.
test_internal_type
(
x
)
assert
x
is
not
x_shared
.
container
.
value
x
/=
values_to_div
# supported by ndarray and CudaNdarray
x
/=
.
5
# this is required by the contract
assert
not
np
.
allclose
(
self
.
ref_fct
(
x
),
total_func
())
...
...
@@ -244,16 +232,10 @@ def makeSharedTester(shared_constructor_,
total_func
=
theano
.
function
([],
total
)
total_func
()
values_to_div
=
.
5
if
self
.
op_by_matrix
:
# supported for cudandarray, but not ndarray.
values_to_div
=
self
.
internal_type
(
np
.
ones
(
x
.
shape
,
dtype
=
dtype
)
/
2
)
assert
self
.
test_internal_type
(
values_to_div
)
# test if that theano shared variable optimize set_value(borrow=True)
get_x
=
x_shared
.
get_value
(
borrow
=
True
)
assert
get_x
is
not
x_orig
# borrow=False to shared_constructor
get_x
/=
values_to_div
get_x
/=
.
5
x_shared
.
set_value
(
get_x
,
borrow
=
True
)
x
=
x_shared
.
get_value
(
borrow
=
True
)
if
self
.
set_value_borrow_true_alias
:
...
...
@@ -267,7 +249,7 @@ def makeSharedTester(shared_constructor_,
assert
get_x
is
not
x_orig
# borrow=False to shared_constructor
assert
self
.
test_internal_type
(
get_x
)
get_x
/=
values_to_div
# supported by ndarray and CudaNdarray
get_x
/=
.
5
assert
self
.
test_internal_type
(
get_x
)
x_shared
.
set_value
(
get_x
,
borrow
=
True
)
x
=
x_shared
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
...
...
@@ -295,12 +277,7 @@ def makeSharedTester(shared_constructor_,
assert
np
.
allclose
(
self
.
ref_fct
(
x
),
total_val
)
values_to_div
=
.
5
if
self
.
op_by_matrix
:
# supported for cudandarray, but not ndarray.
values_to_div
=
self
.
internal_type
(
np
.
ones
(
x
.
shape
,
dtype
=
dtype
)
/
2
)
assert
self
.
test_internal_type
(
values_to_div
)
x
/=
values_to_div
x
/=
.
5
# not required by the contract but it is a feature we've implemented
if
self
.
shared_borrow_true_alias
:
...
...
@@ -345,9 +322,9 @@ def makeSharedTester(shared_constructor_,
if
x
.
__class__
.
__name__
!=
'csr_matrix'
:
# sparse matrix don't support inplace affectation
nd
+=
1
# THIS DON't DO WHAT WE EXPECT the contain of a is not updated for CudaNdarray, but it is for ndarray
# THIS DOENS'T DO WHAT WE EXPECT the content of a is
# not updated for GpuArray, but it is for ndarray
x_shared
.
get_value
(
borrow
=
True
)[:]
=
nd
#assert (np.asarray(x_shared.get_value(borrow=True))!=nd).all()
assert
may_share_memory
(
old_data
,
x_shared
.
container
.
storage
[
0
])
x_shared
.
get_value
(
borrow
=
True
)
...
...
@@ -617,7 +594,6 @@ test_shared_options = makeSharedTester(
theano_fct_
=
lambda
a
:
a
*
2
,
ref_fct_
=
lambda
a
:
np
.
asarray
((
a
*
2
)),
cast_value_
=
np
.
asarray
,
op_by_matrix_
=
False
,
name
=
'test_shared_options'
)
...
...
theano/tensor/type.py
浏览文件 @
1f34a482
...
...
@@ -203,10 +203,9 @@ class TensorType(Type):
"""
Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType, GpuArrayType and
CudaNdarrayType will be
converted, provided they have the same number of dimensions and
dtype and have "compatible" broadcastable pattern.
For the moment, only a TensorType and GpuArrayType will be
converted, provided they have the same number of dimensions
and dtype and have "compatible" broadcastable pattern.
"""
if
hasattr
(
other
,
'_as_TensorVariable'
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论