Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
2f0ab791
提交
2f0ab791
authored
10月 06, 2016
作者:
Pascal Lamblin
提交者:
GitHub
10月 06, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4763 from abergeron/gpuadvsub
Gpuadvsub
上级
ee4c4e21
de5e3064
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
222 行增加
和
42 行删除
+222
-42
opt.py
theano/gpuarray/opt.py
+9
-1
subtensor.py
theano/gpuarray/subtensor.py
+101
-0
test_subtensor.py
theano/gpuarray/tests/test_subtensor.py
+40
-1
test_basic_ops.py
theano/sandbox/cuda/tests/test_basic_ops.py
+13
-16
subtensor.py
theano/tensor/subtensor.py
+3
-10
test_basic.py
theano/tensor/tests/test_basic.py
+1
-4
test_subtensor.py
theano/tensor/tests/test_subtensor.py
+55
-10
没有找到文件。
theano/gpuarray/opt.py
浏览文件 @
2f0ab791
...
...
@@ -52,6 +52,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
from
.elemwise
import
(
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduceCuda
,
GpuCAReduceCPY
,
gpu_ca_reduce_cuda
)
from
.subtensor
import
(
GpuIncSubtensor
,
GpuSubtensor
,
GpuAdvancedSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1_dev20
)
...
...
@@ -971,10 +972,17 @@ def local_gpua_inc_subtensor(op, context_name, inputs, outputs):
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
AdvancedSubtensor1
])
@register_opt2
([
tensor
.
AdvancedSubtensor1
],
'fast_compile'
)
def
local_gpua_advanced_subtensor
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_advanced_subtensor
1
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuAdvancedSubtensor1
()
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
AdvancedSubtensor
])
@register_opt2
([
tensor
.
AdvancedSubtensor
],
'fast_compile'
)
def
local_gpua_advanced_subtensor
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuAdvancedSubtensor
()
@register_opt
(
'fast_compile'
)
@op_lifter
([
tensor
.
AdvancedIncSubtensor1
])
@register_opt2
([
tensor
.
AdvancedIncSubtensor1
],
'fast_compile'
)
...
...
theano/gpuarray/subtensor.py
浏览文件 @
2f0ab791
...
...
@@ -472,6 +472,107 @@ if (err != GA_NO_ERROR) {
return
(
0
,)
class
GpuAdvancedSubtensor
(
HideC
,
tensor
.
AdvancedSubtensor
):
"""
AdvancedSubtensor On the GPU.
"""
def
make_node
(
self
,
x
,
*
inputs
):
ctx_name
=
infer_context_name
(
x
)
rval
=
tensor
.
AdvancedSubtensor
.
make_node
(
self
,
x
,
*
inputs
)
otype
=
GpuArrayType
(
dtype
=
rval
.
outputs
[
0
]
.
type
.
dtype
,
broadcastable
=
rval
.
outputs
[
0
]
.
type
.
broadcastable
,
context_name
=
ctx_name
)
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
return
gof
.
Apply
(
self
,
[
x
]
+
rval
.
inputs
[
1
:],
[
otype
()])
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
x
=
inputs
[
0
]
idx
=
inputs
[
1
:]
# detect and transpose array indices
nidx
=
[]
nshp
=
list
(
x
.
shape
)
for
k
,
i
in
enumerate
(
idx
):
if
i
is
None
:
nidx
.
append
(
slice
(
None
))
nshp
.
insert
(
k
,
1
)
else
:
nidx
.
append
(
i
)
x
=
x
.
reshape
(
nshp
)
narrays
=
0
transp
=
list
(
range
(
x
.
ndim
))
p
=
0
# ap gives the position of the array in case there is only one.
# if there are more than one (narray > 1) it should be ignored.
ap
=
0
for
k
,
i
in
enumerate
(
list
(
nidx
)):
if
(
isinstance
(
i
,
numpy
.
ndarray
)
and
i
.
ndim
!=
0
):
transp
.
remove
(
k
)
transp
.
insert
(
p
,
k
)
ap
+=
k
i
=
nidx
.
pop
(
k
)
nidx
.
insert
(
p
,
i
)
p
+=
1
narrays
+=
1
else
:
if
narrays
==
0
:
try
:
i
.
__index__
()
# We shift back the position of the array by the
# number of dimensions that are removed by
# indexing. If ap is bigger than 0 it means we
# have encountered at least one array.
if
ap
>=
0
:
ap
-=
1
# If this index is before the first array then
# we will not move the array back to its
# position. Mark this by faking that there
# are more than two arrays. This is crazy
# numpy behaviour so blame them.
narrays
=
2
except
Exception
:
pass
x
=
x
.
transpose
(
*
transp
)
idx_
=
([
slice
(
None
)]
*
p
+
nidx
[
p
:])
x
=
x
.
__getitem__
(
idx_
)
# flatten the array-indexed dimensions
shape
=
((
numpy
.
prod
(
x
.
shape
[
0
:
p
]),)
+
x
.
shape
[
p
:])
input_flat
=
x
.
reshape
(
shape
)
# build the strides
strides
=
[
1
]
for
i
in
range
(
p
-
1
,
0
,
-
1
):
stride
=
x
.
shape
[
i
]
*
strides
[
-
1
]
strides
.
insert
(
0
,
stride
)
# build the indices and use it
take_idx
=
sum
((
i
*
s
for
i
,
s
in
zip
(
nidx
,
strides
)))
out_flat
=
input_flat
.
take1
(
pygpu
.
asarray
(
take_idx
.
flatten
(),
context
=
x
.
context
))
# finish up
out_flat_shp
=
take_idx
.
shape
+
x
.
shape
[
p
:]
o
=
out_flat
.
reshape
(
out_flat_shp
)
# If there was only one array we need to move the indexed
# dimension(s) back to the position of the array, which is
# stored in ap. Note that ap is invalid is narrays != 1.
if
narrays
==
1
:
ntransp
=
list
(
range
(
take_idx
.
ndim
,
o
.
ndim
))
ntransp
[
ap
:
ap
]
=
list
(
range
(
take_idx
.
ndim
))
o
=
o
.
transpose
(
*
ntransp
)
out
[
0
]
=
o
class
GpuAdvancedIncSubtensor1
(
Op
):
"""
Implement AdvancedIncSubtensor1 on the gpu.
...
...
theano/gpuarray/tests/test_subtensor.py
浏览文件 @
2f0ab791
...
...
@@ -10,6 +10,7 @@ from ..basic_ops import HostFromGpu, GpuFromHost
from
..elemwise
import
GpuDimShuffle
from
..subtensor
import
(
GpuIncSubtensor
,
GpuSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedSubtensor
,
GpuAdvancedIncSubtensor1
)
from
..type
import
gpuarray_shared_constructor
...
...
@@ -40,7 +41,7 @@ class G_subtensor(test_subtensor.T_subtensor):
def
test_advinc_subtensor1
():
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """
# Test the second case in the opt local_gpu_advanced_incsubtensor1
for
shp
in
[(
3
,
3
),
(
3
,
3
,
3
)]:
shared
=
gpuarray_shared_constructor
xval
=
numpy
.
arange
(
numpy
.
prod
(
shp
),
dtype
=
'float32'
)
.
reshape
(
shp
)
+
1
...
...
@@ -87,3 +88,41 @@ def test_incsub_f16():
rep
=
xval
.
copy
()
rep
[
1
:]
+=
yval
assert
numpy
.
allclose
(
rval
,
rep
)
class
G_advancedsubtensor
(
test_subtensor
.
TestAdvancedSubtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
test_subtensor
.
TestAdvancedSubtensor
.
__init__
(
self
,
name
,
shared
=
gpuarray_shared_constructor
,
sub
=
GpuAdvancedSubtensor
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float32'
,
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
# GPU opt can't run in fast_compile only.
self
.
fast_compile
=
False
assert
self
.
sub
==
GpuAdvancedSubtensor
def
test_adv_subtensor
():
# Test the advancedsubtensor on gpu.
shp
=
(
2
,
3
,
4
)
shared
=
gpuarray_shared_constructor
xval
=
numpy
.
arange
(
numpy
.
prod
(
shp
),
dtype
=
theano
.
config
.
floatX
)
.
reshape
(
shp
)
idx1
,
idx2
=
tensor
.
ivectors
(
'idx1'
,
'idx2'
)
idxs
=
[
idx1
,
None
,
slice
(
0
,
2
,
1
),
idx2
,
None
]
x
=
shared
(
xval
,
name
=
'x'
)
expr
=
x
[
idxs
]
f
=
theano
.
function
([
idx1
,
idx2
],
expr
,
mode
=
mode_with_gpu
)
assert
sum
([
isinstance
(
node
.
op
,
GpuAdvancedSubtensor
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
idx1_val
=
[
0
,
1
]
idx2_val
=
[
0
,
1
]
rval
=
f
(
idx1_val
,
idx2_val
)
rep
=
xval
[
idx1_val
,
None
,
slice
(
0
,
2
,
1
),
idx2_val
,
None
]
assert
numpy
.
allclose
(
rval
,
rep
)
theano/sandbox/cuda/tests/test_basic_ops.py
浏览文件 @
2f0ab791
...
...
@@ -1009,23 +1009,20 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
def
shortDescription
(
self
):
return
None
shared
=
staticmethod
(
cuda
.
shared_constructor
)
sub
=
cuda
.
GpuSubtensor
inc_sub
=
cuda
.
GpuIncSubtensor
adv_sub1
=
cuda
.
GpuAdvancedSubtensor1
adv_incsub1
=
cuda
.
GpuAdvancedIncSubtensor1
dimshuffle
=
cuda
.
GpuDimShuffle
mode
=
mode_with_gpu
dtype
=
'float32'
type
=
tcn
.
CudaNdarrayType
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
,
theano
.
compile
.
DeepCopyOp
)
fast_compile
=
False
ops
=
(
cuda
.
GpuSubtensor
,
cuda
.
GpuIncSubtensor
,
cuda
.
GpuAdvancedSubtensor1
,
cuda
.
GpuAdvancedIncSubtensor1
)
def
__init__
(
self
,
name
):
return
super
(
theano
.
tensor
.
tests
.
test_subtensor
.
T_subtensor
,
self
)
.
__init__
(
name
)
super
(
T_subtensor
,
self
)
.
__init__
(
name
,
shared
=
cuda
.
shared_constructor
,
sub
=
cuda
.
GpuSubtensor
,
inc_sub
=
cuda
.
GpuIncSubtensor
,
adv_sub1
=
cuda
.
GpuAdvancedSubtensor1
,
adv_incsub1
=
cuda
.
GpuAdvancedIncSubtensor1
,
dimshuffle
=
cuda
.
GpuDimShuffle
,
mode
=
mode_with_gpu
,
dtype
=
'float32'
,
type
=
tcn
.
CudaNdarrayType
,
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
,
theano
.
compile
.
DeepCopyOp
))
self
.
fast_compile
=
False
def
test_adv_sub1_fast
(
self
):
"""We check that the special cases of advanced indexing that
...
...
theano/tensor/subtensor.py
浏览文件 @
2f0ab791
...
...
@@ -20,7 +20,7 @@ from theano.tensor.basic import alloc
from
theano.tensor.basic
import
(
addbroadcast
,
clip
,
get_scalar_constant_value
,
ARange
,
TensorType
,
NotScalarConstantError
)
from
theano.tensor.elemwise
import
DimShuffle
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
make_slice
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
NoneTypeT
,
make_slice
from
theano
import
config
inplace_increment
=
None
...
...
@@ -2077,6 +2077,8 @@ def as_index_variable(idx):
return
make_slice
(
idx
)
if
isinstance
(
idx
,
gof
.
Variable
)
and
isinstance
(
idx
.
type
,
SliceType
):
return
idx
if
isinstance
(
idx
,
gof
.
Variable
)
and
isinstance
(
idx
.
type
,
NoneTypeT
):
return
idx
idx
=
theano
.
tensor
.
as_tensor_variable
(
idx
)
if
idx
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uin'
):
raise
TypeError
(
'index must be integers'
)
...
...
@@ -2165,17 +2167,8 @@ class AdvancedSubtensor(Op):
# TODO: in general, we need to re-pack the inputs into a valid
# index, just like subtensor
out
[
0
]
=
inputs
[
0
]
.
__getitem__
(
inputs
[
1
:])
if
(
numpy
.
__version__
<=
'1.6.1'
and
out
[
0
]
.
size
!=
numpy
.
uint32
(
out
[
0
]
.
size
)):
warnings
.
warn
(
'Numpy versions 1.6.1 and below have a bug preventing '
'advanced indexing from correctly filling arrays that '
'are too big (>= 2^32 elements). It is possible that '
'out[0] (
%
s), with shape
%
s, is not correctly filled.'
%
(
out
[
0
],
out
[
0
]
.
shape
))
def
connection_pattern
(
self
,
node
):
rval
=
[[
True
]]
for
ipt
in
node
.
inputs
[
1
:]:
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
2f0ab791
...
...
@@ -6692,14 +6692,11 @@ class test_arithmetic_cast(unittest.TestCase):
config
.
int_division
==
'floatX'
):
assert
theano_dtype
==
config
.
floatX
continue
numpy_version
=
[
int
(
v
)
for
v
in
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
if
(
cfg
==
'numpy+floatX'
and
a_type
==
'complex128'
and
(
b_type
==
'float32'
or
b_type
==
'float16'
)
and
combo
==
(
'scalar'
,
'array'
)
and
bool
(
numpy_version
>=
[
1
,
6
])
and
theano_dtype
==
'complex128'
and
numpy_dtype
==
'complex64'
):
# In numpy 1.6.x adding a complex128 with
...
...
@@ -6707,7 +6704,7 @@ class test_arithmetic_cast(unittest.TestCase):
# of 1.9.2. this is still the case so it is
# probably by design
raise
SkipTest
(
"Known issue with"
"numpy
>= 1.6.x
see #761"
)
"numpy see #761"
)
# In any other situation: something wrong is
# going on!
assert
False
...
...
theano/tensor/tests/test_subtensor.py
浏览文件 @
2f0ab791
...
...
@@ -20,8 +20,8 @@ from theano.compile import DeepCopyOp
from
theano.tensor
import
(
MakeSlice
,
NotScalarConstantError
,
_shared
,
as_tensor_variable
,
cscalar
,
ctensor3
,
dmatrix
,
dscalar
,
dtensor4
,
dvector
,
fmatrix
,
fscalar
,
fvector
,
iscalar
,
lmatrix
,
lrow
,
lvector
,
matrix
,
vector
)
fvector
,
ftensor4
,
iscalar
,
lmatrix
,
lrow
,
lvector
,
matrix
,
vector
)
from
theano.tensor.basic
import
DimShuffle
from
theano.tensor.subtensor
import
(
AdvancedIncSubtensor
,
AdvancedIncSubtensor1
,
AdvancedSubtensor
,
...
...
@@ -55,6 +55,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
inc_sub
=
tensor
.
IncSubtensor
,
adv_sub1
=
tensor
.
AdvancedSubtensor1
,
adv_incsub1
=
tensor
.
AdvancedIncSubtensor1
,
adv_sub
=
tensor
.
AdvancedSubtensor
,
mode
=
None
,
dtype
=
theano
.
config
.
floatX
,
type
=
tensor
.
TensorType
,
...
...
@@ -65,6 +66,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self
.
inc_sub
=
inc_sub
self
.
adv_sub1
=
adv_sub1
self
.
adv_incsub1
=
adv_incsub1
self
.
adv_sub
=
adv_sub
self
.
dimshuffle
=
dimshuffle
if
mode
is
None
:
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
...
...
@@ -354,13 +356,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
(
3
,
DimShuffle
,
self
.
dimshuffle
,
numpy
.
index_exp
[
...
,
[
0
,
2
,
3
]]),
(
1
,
DimShuffle
,
self
.
dimshuffle
,
numpy
.
index_exp
[
numpy
.
newaxis
,
...
])]
# The following test case is not supported by numpy before 1.9
numpy_version
=
[
int
(
v
)
for
v
in
numpy
.
version
.
version
.
split
(
'.'
)[
0
:
2
]]
if
numpy_version
>=
[
1
,
9
]:
test_cases
.
append
(
(
1
,
AdvancedSubtensor
,
AdvancedSubtensor
,
numpy
.
index_exp
[
...
,
numpy
.
newaxis
,
[
1
,
2
]]))
numpy
.
index_exp
[
numpy
.
newaxis
,
...
]),
(
1
,
AdvancedSubtensor
,
self
.
adv_sub
,
numpy
.
index_exp
[
...
,
numpy
.
newaxis
,
[
1
,
2
]])]
for
length
,
op_type
,
op_type_opt
,
slice_
in
test_cases
:
numpy_tval
=
numpy_n
[
slice_
]
...
...
@@ -1351,6 +1349,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
self
.
v
=
fvector
()
self
.
m
=
dmatrix
()
self
.
t
=
ctensor3
()
self
.
ft4
=
ftensor4
()
self
.
ix1
=
lvector
()
# advanced 1d query
self
.
ix12
=
lvector
()
...
...
@@ -1421,11 +1420,57 @@ class TestAdvancedSubtensor(unittest.TestCase):
a
=
inc_subtensor
(
subt
,
subt
)
assert
a
.
type
==
self
.
v
.
type
,
(
a
.
type
,
self
.
v
.
type
)
f
=
theano
.
function
([
self
.
v
,
self
.
ix2
],
a
,
allow_input_downcast
=
True
)
f
=
theano
.
function
([
self
.
v
,
self
.
ix2
],
a
,
allow_input_downcast
=
True
,
mode
=
self
.
mode
)
aval
=
f
([
.
4
,
.
9
,
.
1
],
[[
1
,
2
],
[
1
,
2
]])
assert
numpy
.
allclose
(
aval
,
[
.
4
,
.
9
*
3
,
.
1
*
3
])
def
test_adv_subtensor_w_int_and_matrix
(
self
):
subt
=
self
.
ft4
[
0
,
:,
self
.
ix2
,
:]
f
=
theano
.
function
([
self
.
ft4
,
self
.
ix2
],
subt
,
mode
=
self
.
mode
)
ft4v
=
numpy
.
random
.
random
((
2
,
3
,
4
,
5
))
.
astype
(
'float32'
)
ix2v
=
numpy
.
asarray
([[
0
,
1
],
[
1
,
0
]])
aval
=
f
(
ft4v
,
ix2v
)
rval
=
ft4v
[
0
,
:,
ix2v
,
:]
utt
.
assert_allclose
(
rval
,
aval
)
def
test_adv_subtensor_w_none_and_matrix
(
self
):
subt
=
self
.
ft4
[:,
None
,
:,
self
.
ix2
,
:]
f
=
theano
.
function
([
self
.
ft4
,
self
.
ix2
],
subt
,
mode
=
self
.
mode
)
ft4v
=
numpy
.
random
.
random
((
2
,
3
,
4
,
5
))
.
astype
(
'float32'
)
ix2v
=
numpy
.
asarray
([[
0
,
1
],
[
1
,
0
]])
aval
=
f
(
ft4v
,
ix2v
)
rval
=
ft4v
[:,
None
,
:,
ix2v
,
:]
utt
.
assert_allclose
(
rval
,
aval
)
def
test_adv_subtensor_w_slice_and_matrix
(
self
):
subt
=
self
.
ft4
[:,
0
:
1
,
self
.
ix2
,
:]
f
=
theano
.
function
([
self
.
ft4
,
self
.
ix2
],
subt
,
mode
=
self
.
mode
)
ft4v
=
numpy
.
random
.
random
((
2
,
3
,
4
,
5
))
.
astype
(
'float32'
)
ix2v
=
numpy
.
asarray
([[
0
,
1
],
[
1
,
0
]])
aval
=
f
(
ft4v
,
ix2v
)
rval
=
ft4v
[:,
0
:
1
,
ix2v
,
:]
utt
.
assert_allclose
(
rval
,
aval
)
def
test_adv_subtensor_w_matrix_and_int
(
self
):
subt
=
self
.
ft4
[:,
:,
self
.
ix2
,
0
]
f
=
theano
.
function
([
self
.
ft4
,
self
.
ix2
],
subt
,
mode
=
self
.
mode
)
ft4v
=
numpy
.
random
.
random
((
2
,
3
,
4
,
5
))
.
astype
(
'float32'
)
ix2v
=
numpy
.
asarray
([[
0
,
1
],
[
1
,
0
]])
aval
=
f
(
ft4v
,
ix2v
)
rval
=
ft4v
[:,
:,
ix2v
,
0
]
utt
.
assert_allclose
(
rval
,
aval
)
def
test_adv_subtensor_w_matrix_and_none
(
self
):
subt
=
self
.
ft4
[:,
:,
self
.
ix2
,
None
,
:]
f
=
theano
.
function
([
self
.
ft4
,
self
.
ix2
],
subt
,
mode
=
self
.
mode
)
ft4v
=
numpy
.
random
.
random
((
2
,
3
,
4
,
5
))
.
astype
(
'float32'
)
ix2v
=
numpy
.
asarray
([[
0
,
1
],
[
1
,
0
]])
aval
=
f
(
ft4v
,
ix2v
)
rval
=
ft4v
[:,
:,
ix2v
,
None
,
:]
utt
.
assert_allclose
(
rval
,
aval
)
def
test_inc_adv_subtensor_w_2vec
(
self
):
if
inplace_increment
is
None
:
raise
inplace_increment_missing
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论