Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9cde027a
提交
9cde027a
authored
3月 29, 2017
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove tentacles in misc.
上级
80a1e8e0
全部展开
显示空白字符变更
内嵌
并排
正在显示
17 个修改的文件
包含
87 行增加
和
947 行删除
+87
-947
test_others.py
theano/gpuarray/tests/test_others.py
+34
-0
ifelse.py
theano/ifelse.py
+2
-2
check_blas.py
theano/misc/check_blas.py
+19
-83
cudamat_utils.py
theano/misc/cudamat_utils.py
+0
-110
gnumpy_utils.py
theano/misc/gnumpy_utils.py
+0
-124
may_share_memory.py
theano/misc/may_share_memory.py
+8
-23
pkl_utils.py
theano/misc/pkl_utils.py
+14
-16
pycuda_example.py
theano/misc/pycuda_example.py
+0
-0
pycuda_init.py
theano/misc/pycuda_init.py
+0
-66
pycuda_utils.py
theano/misc/pycuda_utils.py
+0
-66
test_cudamat_utils.py
theano/misc/tests/test_cudamat_utils.py
+0
-37
test_gnumpy_utils.py
theano/misc/tests/test_gnumpy_utils.py
+0
-78
test_may_share_memory.py
theano/misc/tests/test_may_share_memory.py
+9
-5
test_pkl_utils.py
theano/misc/tests/test_pkl_utils.py
+1
-21
test_pycuda_example.py
theano/misc/tests/test_pycuda_example.py
+0
-106
test_pycuda_theano_simple.py
theano/misc/tests/test_pycuda_theano_simple.py
+0
-131
test_pycuda_utils.py
theano/misc/tests/test_pycuda_utils.py
+0
-79
没有找到文件。
theano/gpuarray/tests/test_others.py
0 → 100644
浏览文件 @
9cde027a
from
.config
import
test_ctx_name
from
..type
import
get_context
,
GpuArrayType
,
GpuArraySharedVariable
import
pygpu
import
numpy
as
np
from
theano.misc.tests.test_may_share_memory
import
may_share_memory_core
from
theano.misc.pkl_utils
import
dump
,
load
def
test_may_share_memory
():
ctx
=
get_context
(
test_ctx_name
)
a
=
pygpu
.
empty
((
5
,
4
),
context
=
ctx
)
b
=
pygpu
.
empty
((
5
,
4
),
context
=
ctx
)
may_share_memory_core
(
a
,
b
)
def
test_dump_load
():
x
=
GpuArraySharedVariable
(
'x'
,
GpuArrayType
(
'float32'
,
(
1
,
1
),
name
=
'x'
,
context_name
=
test_ctx_name
),
[[
1
]],
False
)
with
open
(
'test'
,
'wb'
)
as
f
:
dump
(
x
,
f
)
with
open
(
'test'
,
'rb'
)
as
f
:
x
=
load
(
f
)
assert
x
.
name
==
'x'
np
.
testing
.
assert_allclose
(
x
.
get_value
(),
[[
1
]])
theano/ifelse.py
浏览文件 @
9cde027a
...
...
@@ -168,8 +168,8 @@ class IfElse(Op):
)
c
=
theano
.
tensor
.
as_tensor_variable
(
c
)
if
not
self
.
gpu
:
# When gpu is true, we are given only
cuda nd
arrays, and we want
# to keep them
be cuda nd
arrays
# When gpu is true, we are given only
gpu
arrays, and we want
# to keep them
as gpu
arrays
nw_args
=
[]
for
x
in
args
:
if
hasattr
(
x
,
'_as_TensorVariable'
):
...
...
theano/misc/check_blas.py
浏览文件 @
9cde027a
...
...
@@ -11,7 +11,6 @@ import os
import
sys
import
time
from
optparse
import
OptionParser
import
subprocess
import
numpy
as
np
import
theano
...
...
@@ -51,12 +50,6 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
print
(
'Numpy dot module:'
,
np
.
dot
.
__module__
)
print
(
'Numpy location:'
,
np
.
__file__
)
print
(
'Numpy version:'
,
np
.
__version__
)
if
(
theano
.
config
.
device
.
startswith
(
"gpu"
)
or
theano
.
config
.
init_gpu_device
.
startswith
(
"gpu"
)):
print
(
'nvcc version:'
)
subprocess
.
call
((
theano
.
sandbox
.
cuda
.
nvcc_compiler
.
nvcc_path
,
"--version"
))
print
()
a
=
theano
.
shared
(
np
.
ones
((
M
,
N
),
dtype
=
theano
.
config
.
floatX
,
order
=
order
))
...
...
@@ -88,17 +81,15 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
f
()
# Ignore first function call to get representative time.
if
execute
:
sync
=
(
hasattr
(
theano
,
"sandbox"
)
and
hasattr
(
theano
.
sandbox
,
"cuda"
)
and
isinstance
(
c
,
theano
.
sandbox
.
cuda
.
CudaNdarraySharedVariable
))
sync2
=
(
hasattr
(
theano
,
"gpuarray"
)
and
sync
=
(
hasattr
(
theano
,
"gpuarray"
)
and
isinstance
(
c
,
theano
.
gpuarray
.
GpuArraySharedVariable
))
if
sync
:
# Make sure we don't include the time from the first call
c
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
.
sync
()
t0
=
time
.
time
()
for
i
in
range
(
iters
):
f
()
if
sync
:
theano
.
sandbox
.
cuda
.
synchronize
()
if
sync2
:
c
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
.
sync
()
t1
=
time
.
time
()
return
t1
-
t0
,
impl
...
...
@@ -199,85 +190,30 @@ if __name__ == "__main__":
goto2 1.13/8 1.94s
goto2 1.13/16 3.16s
Test time in float32
cuda version 6.5 6.0 5.5 5.0 4.2 4.1 4.0 3.2 3.0 # note
gpu
K6000/NOECC 0.06s 0.06s
K40 0.07s
K20m/ECC 0.08s 0.08s 0.07s
K20/NOECC 0.07s
M2090 0.19s
C2075 0.25s
M2075 0.25s
M2070 0.25s 0.27s 0.32s
M2070-Q 0.48s 0.27s 0.32s
M2050(Amazon) 0.25s
C1060 0.46s
K600 1.04s
GTX Titan Black 0.05s
GTX Titan(D15U-50) 0.06s 0.06s don't work
GTX 780 0.06s
GTX 980 0.06s
GTX 970 0.08s
GTX 680 0.11s 0.12s 0.154s 0.218s
GRID K520 0.14s
GTX 580 0.16s 0.16s 0.164s 0.203s
GTX 480 0.19s 0.19s 0.192s 0.237s 0.27s
GTX 750 Ti 0.20s
GTX 470 0.23s 0.23s 0.238s 0.297s 0.34s
GTX 660 0.18s 0.20s 0.23s
GTX 560 0.30s
GTX 650 Ti 0.27s
GTX 765M 0.27s
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GT 610 2.38s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
GT 520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
GT 220 3.80s
GT 210 6.35s
8500 GT 10.68s
Results for larger matrices.
There were 10 executions of gemm in float32
with matrices of shape 5000x5000 (M=N=K=5000).
Test time in float32. There were 10 executions of gemm in
float32 with matrices of shape 5000x5000 (M=N=K=5000)
All memory layout was in C order.
cuda version 7.5 7.0 6.5
cuda version 8.0 7.5 7.0
gpu
M40 0.47s
k80 0.96s
K6000/NOECC 0.69s
K40 0.88s
K20m/ECC
K20/NOECC
M2090
C2075
M2075
M2070
M2070-Q
M2050(Amazon)
C1060
K600
GTX Titan X 0.45s 0.47s
GTX Titan Black 0.64s 0.64s
GTX Titan(D15U-50)
GTX 780
M40 0.45s 0.47s
k80 0.92s 0.96s
K6000/NOECC 0.71s 0.69s
P6000/NOECC 0.25s
Titan X (Pascal) 0.28s
GTX Titan X 0.45s 0.45s 0.47s
GTX Titan Black 0.66s 0.64s 0.64s
GTX 1080 0.35s
GTX 980 Ti 0.41s
GTX 980
GTX 970 0.66s
GTX 680 1.57s
GRID K520
GTX 750 Ti 2.01s 2.01s
GTX 750 2.46s 2.37s
GTX 660 2.32s 2.32s
GTX 580
2.42s 2.47
s
GTX 480
2.87s 2.88
s
GTX 580
2.42
s
GTX 480
2.87
s
TX1 7.6s (float32 storage and computation)
GT 610 33.5s
"""
)
...
...
theano/misc/cudamat_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
"""
This code can only work if cudamat and theano are initialized on the
same gpu as theano.
WARNING: In the test of this file there is a transpose that is used...
So there can be problem with shape and stride order...
"""
from
__future__
import
absolute_import
,
print_function
,
division
import
six
try
:
import
cudamat
cudamat_available
=
True
import
theano.sandbox.cuda
as
cuda
if
cuda
.
cuda_available
is
False
:
raise
ImportError
(
'Optional theano package cuda disabled'
)
if
six
.
PY3
:
long
=
int
def
cudandarray_to_cudamat
(
x
,
copyif
=
False
):
""" take a CudaNdarray and return a cudamat.CUDAMatrix object.
:type x: CudaNdarray
:param x: The array to transform to cudamat.CUDAMatrix.
:type copyif: bool
:param copyif: If False, raise an error if x is not c contiguous.
If it is c contiguous, we return a GPUArray that share
the same memory region as x.
If True, copy x if it is no c contiguous, so the return won't
shape the same memory region. If c contiguous, the return
will share the same memory region.
We need to do this as GPUArray don't fully support strided memory.
:return type: cudamat.CUDAMatrix
"""
if
not
isinstance
(
x
,
cuda
.
CudaNdarray
):
raise
ValueError
(
"We can transfer only CudaNdarray to cudamat.CUDAMatrix"
)
elif
x
.
ndim
!=
2
:
raise
TypeError
(
"cudandarray_to_cudamat: input must be 2-d (has
%
s dims). That's "
"because cudamat arrays are always 2-dimensional"
)
else
:
# Check if it is c contiguous
size
=
1
c_contiguous
=
True
for
i
in
range
(
x
.
ndim
-
1
,
-
1
,
-
1
):
if
x
.
shape
[
i
]
==
1
:
continue
if
x
.
_strides
[
i
]
!=
size
:
c_contiguous
=
False
break
size
*=
x
.
shape
[
i
]
if
not
c_contiguous
:
if
copyif
:
x
=
x
.
copy
()
else
:
raise
ValueError
(
"We where asked to don't copy memory, but the memory is not c contiguous."
)
# Now x is always c contiguous.
# the next step is to create a CUDAMatrix object. We do so by first creating
# a cudamat object with no data_host.
cm_mat
=
cudamat
.
cudamat
()
cm_mat
.
size
[
0
]
=
x
.
shape
[
0
]
cm_mat
.
size
[
1
]
=
x
.
shape
[
1
]
cm_mat
.
on_host
=
0
cm_mat
.
on_device
=
1
cm_mat
.
is_trans
=
0
cm_mat
.
owns_data
=
0
# <-- note: cm_mat dosen't owe the data; x does. So x will delete it.
# x.gpudata is a long. We need a pointer to a float. cast.
import
ctypes
cm_mat
.
data_device
=
ctypes
.
cast
(
x
.
gpudata
,
ctypes
.
POINTER
(
ctypes
.
c_float
))
px
=
cudamat
.
CUDAMatrix
(
cm_mat
)
px
.
_base
=
x
# x won't be __del__'ed as long as px is around.
# let cudamat know that we don't have a numpy array attached.
px
.
mat_on_host
=
False
return
px
def
cudamat_to_cudandarray
(
x
):
""" take a cudamat.CUDAMatrix and make a CudaNdarray that point to its memory
"""
if
not
isinstance
(
x
,
cudamat
.
CUDAMatrix
):
raise
ValueError
(
"We can transfer only cudamat.CUDAMatrix to CudaNdarray"
)
# elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32.
else
:
strides
=
[
1
]
for
i
in
x
.
shape
[::
-
1
][:
-
1
]:
strides
.
append
(
strides
[
-
1
]
*
i
)
strides
=
tuple
(
strides
[::
-
1
])
import
ctypes
ptr_long
=
long
(
ctypes
.
cast
(
x
.
mat
.
data_device
,
ctypes
.
c_void_p
)
.
value
)
# seems legit.
z
=
cuda
.
from_gpu_pointer
(
ptr_long
,
x
.
shape
,
strides
,
x
)
return
z
except
(
ImportError
,
OSError
):
cudamat_available
=
False
theano/misc/gnumpy_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
"""
This code can only work if gnumpy and theano are initialized on the
same gpu as theano.
"""
from
__future__
import
absolute_import
,
print_function
,
division
import
six
from
six.moves
import
reduce
try
:
import
gnumpy
import
cudamat
gnumpy_available
=
True
___const_garray
=
gnumpy
.
rand
(
1
)
import
theano.sandbox.cuda
as
cuda
if
cuda
.
cuda_available
is
False
:
raise
ImportError
(
'Optional theano package cuda disabled'
)
if
six
.
PY3
:
long
=
int
def
cudandarray_to_garray
(
x
,
copyif
=
False
):
""" take a CudaNdarray and return a gnumpy.garray object.
:type x: CudaNdarray
:param x: The array to transform to gnumpy.garray.
:type copyif: bool
:param copyif: If False, raise an error if x is not c contiguous.
If it is c contiguous, we return a GPUArray that share
the same memory region as x.
If True, copy x if it is no c contiguous, so the return won't
shape the same memory region. If c contiguous, the return
will share the same memory region.
We need to do this as GPUArray don't fully support strided memory.
:return type: cudamat.CUDAMatrix
"""
if
not
isinstance
(
x
,
cuda
.
CudaNdarray
):
raise
ValueError
(
"We can transfer only CudaNdarray to cudamat.CUDAMatrix"
)
else
:
# Check if it is c contiguous
size
=
1
c_contiguous
=
True
for
i
in
range
(
x
.
ndim
-
1
,
-
1
,
-
1
):
if
x
.
shape
[
i
]
==
1
:
continue
if
x
.
_strides
[
i
]
!=
size
:
c_contiguous
=
False
break
size
*=
x
.
shape
[
i
]
if
not
c_contiguous
:
if
copyif
:
x
=
x
.
copy
()
else
:
raise
ValueError
(
"We where asked to don't copy memory, but the memory is not c contiguous."
)
# Now x is always c contiguous.
# the next step is to create a CUDAMatrix object. We do so by first creating
# a cudamat object with no data_host.
cm_mat
=
cudamat
.
cudamat
()
cm_mat
.
size
[
0
]
=
reduce
(
lambda
x
,
y
:
x
*
y
,
x
.
shape
,
1
)
cm_mat
.
size
[
1
]
=
1
cm_mat
.
on_host
=
0
cm_mat
.
on_device
=
1
cm_mat
.
is_trans
=
0
cm_mat
.
owns_data
=
0
# <-- note: cm_mat dosen't owe the data; x does. So x will delete it.
# x.gpudata is a long. We need a pointer to a float. cast.
import
ctypes
cm_mat
.
data_device
=
ctypes
.
cast
(
x
.
gpudata
,
ctypes
.
POINTER
(
ctypes
.
c_float
))
px
=
cudamat
.
CUDAMatrix
(
cm_mat
)
px
.
_base
=
x
# x won't be freed if the cudamat object isn't freed.
# let cudamat know that we don't have a numpy array attached.
px
.
mat_on_host
=
False
# Note how gnumpy tracks its cudamat objects: it moves things to the
# _cmsReuseCache when the gnumpy array is deleted, thus the arrays
# returned by theano will never be deleted.
# However, if the garray thinks that the object is a view, then it won't
# move the _base to the _cmsResueCache; so the cudamat object will be deleted,
# and we won't overpump the world with memory.
_is_alias_of
=
___const_garray
ans
=
gnumpy
.
garray
(
px
,
x
.
shape
,
_is_alias_of
)
return
ans
def
garray_to_cudandarray
(
x
):
""" take a gnumpy.garray and make a CudaNdarray that point to its memory
"""
if
not
isinstance
(
x
,
gnumpy
.
garray
):
raise
ValueError
(
"We can transfer only gnumpy.garray to CudaNdarray"
)
# elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32.
else
:
strides
=
[
1
]
for
i
in
x
.
shape
[::
-
1
][:
-
1
]:
strides
.
append
(
strides
[
-
1
]
*
i
)
strides
=
strides
[::
-
1
]
for
i
in
range
(
len
(
strides
)):
if
x
.
shape
[
i
]
==
1
:
strides
[
i
]
=
0
strides
=
tuple
(
strides
)
import
ctypes
ptr_long
=
long
(
ctypes
.
cast
(
x
.
_base
.
mat
.
data_device
,
ctypes
.
c_void_p
)
.
value
)
# seems legit.
z
=
cuda
.
from_gpu_pointer
(
ptr_long
,
x
.
shape
,
strides
,
x
.
_base
)
return
z
except
(
ImportError
,
OSError
):
gnumpy_available
=
False
theano/misc/may_share_memory.py
浏览文件 @
9cde027a
"""
Function to detect memory sharing for ndarray AND sparse type AND
CudaNda
rray.
Function to detect memory sharing for ndarray AND sparse type AND
GpuA
rray.
numpy version support only ndarray.
"""
from
__future__
import
absolute_import
,
print_function
,
division
...
...
@@ -14,25 +14,12 @@ try:
def
_is_sparse
(
a
):
return
scipy
.
sparse
.
issparse
(
a
)
except
ImportError
:
# scipy not imported, their can be only ndarray and
cudand
array
# scipy not imported, their can be only ndarray and
gpu
array
def
_is_sparse
(
a
):
return
False
from
theano.sandbox
import
cuda
from
theano
import
gpuarray
if
cuda
.
cuda_available
:
from
theano.sandbox.cuda.type
import
CudaNdarrayType
def
_is_cuda
(
a
):
return
isinstance
(
a
,
cuda
.
CudaNdarray
)
else
:
def
_is_cuda
(
a
):
return
False
__docformat__
=
"restructuredtext en"
if
gpuarray
.
pygpu
:
def
_is_gpua
(
a
):
return
isinstance
(
a
,
gpuarray
.
pygpu
.
gpuarray
.
GpuArray
)
...
...
@@ -40,16 +27,14 @@ else:
def
_is_gpua
(
a
):
return
False
__docformat__
=
"restructuredtext en"
def
may_share_memory
(
a
,
b
,
raise_other_type
=
True
):
a_ndarray
=
isinstance
(
a
,
np
.
ndarray
)
b_ndarray
=
isinstance
(
b
,
np
.
ndarray
)
if
a_ndarray
and
b_ndarray
:
return
TensorType
.
may_share_memory
(
a
,
b
)
a_cuda
=
_is_cuda
(
a
)
b_cuda
=
_is_cuda
(
b
)
if
a_cuda
and
b_cuda
:
return
CudaNdarrayType
.
may_share_memory
(
a
,
b
)
a_gpua
=
_is_gpua
(
a
)
b_gpua
=
_is_gpua
(
b
)
if
a_gpua
and
b_gpua
:
...
...
@@ -57,13 +42,13 @@ def may_share_memory(a, b, raise_other_type=True):
a_sparse
=
_is_sparse
(
a
)
b_sparse
=
_is_sparse
(
b
)
if
(
not
(
a_ndarray
or
a_sparse
or
a_
cuda
or
a_
gpua
)
or
not
(
b_ndarray
or
b_sparse
or
b_
cuda
or
b_
gpua
)):
if
(
not
(
a_ndarray
or
a_sparse
or
a_gpua
)
or
not
(
b_ndarray
or
b_sparse
or
b_gpua
)):
if
raise_other_type
:
raise
TypeError
(
"may_share_memory support only ndarray"
" and scipy.sparse
, CudaNdarray
or GpuArray type"
)
" and scipy.sparse or GpuArray type"
)
return
False
if
a_
cuda
or
b_cuda
or
a_
gpua
or
b_gpua
:
if
a_gpua
or
b_gpua
:
return
False
return
SparseType
.
may_share_memory
(
a
,
b
)
theano/misc/pkl_utils.py
浏览文件 @
9cde027a
...
...
@@ -26,11 +26,11 @@ from theano import config
from
theano.compat
import
PY3
from
six
import
string_types
from
theano.compile.sharedvalue
import
SharedVariable
try
:
from
theano.sandbox.cuda
import
cuda_ndarray
import
pygpu
except
ImportError
:
cuda_ndarray
=
None
pygpu
=
None
__docformat__
=
"restructuredtext en"
__authors__
=
"Pascal Lamblin"
...
...
@@ -202,21 +202,21 @@ class PersistentNdarrayID(object):
return
self
.
seen
[
id
(
obj
)]
class
Persistent
CudaNda
rrayID
(
PersistentNdarrayID
):
class
Persistent
GpuA
rrayID
(
PersistentNdarrayID
):
def
__call__
(
self
,
obj
):
if
(
cuda_ndarray
is
not
None
and
type
(
obj
)
is
cuda_ndarray
.
cuda_ndarray
.
CudaNdarray
):
if
(
pygpu
and
isinstance
(
obj
,
pygpu
.
gpuarray
.
GpuArray
)
):
if
id
(
obj
)
not
in
self
.
seen
:
def
write_array
(
f
):
np
.
lib
.
format
.
write_array
(
f
,
np
.
asarray
(
obj
))
name
=
self
.
_resolve_name
(
obj
)
zipadd
(
write_array
,
self
.
zip_file
,
name
)
self
.
seen
[
id
(
obj
)]
=
'
cuda_nd
array.{0}'
.
format
(
name
)
self
.
seen
[
id
(
obj
)]
=
'
gpu
array.{0}'
.
format
(
name
)
return
self
.
seen
[
id
(
obj
)]
return
super
(
Persistent
CudaNda
rrayID
,
self
)
.
__call__
(
obj
)
return
super
(
Persistent
GpuA
rrayID
,
self
)
.
__call__
(
obj
)
class
PersistentSharedVariableID
(
Persistent
CudaNda
rrayID
):
class
PersistentSharedVariableID
(
Persistent
GpuA
rrayID
):
"""Uses shared variable names when persisting to zip file.
If a shared variable has a name, this name is used as the name of the
...
...
@@ -288,18 +288,16 @@ class PersistentNdarrayLoad(object):
return
self
.
cache
[
name
]
ret
=
None
array
=
np
.
lib
.
format
.
read_array
(
self
.
zip_file
.
open
(
name
))
if
array_type
==
'
cuda_nd
array'
:
if
array_type
==
'
gpu
array'
:
if
config
.
experimental
.
unpickle_gpu_on_cpu
:
# directly return numpy array
warnings
.
warn
(
"config.experimental.unpickle_gpu_on_cpu is set "
"to True. Unpickling CudaNdarray as "
"numpy.ndarray"
)
"to True. Unpickling GpuArray as numpy.ndarray"
)
ret
=
array
elif
cuda_ndarray
:
ret
=
cuda_ndarray
.
cuda_ndarray
.
CudaNd
array
(
array
)
elif
pygpu
:
ret
=
pygpu
.
array
(
array
)
else
:
raise
ImportError
(
"Cuda not found. Cannot unpickle "
"CudaNdarray"
)
raise
ImportError
(
"pygpu not found. Cannot unpickle GpuArray"
)
else
:
ret
=
array
self
.
cache
[
name
]
=
ret
...
...
theano/misc/pycuda_example.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
差异被折叠。
点击展开。
theano/misc/pycuda_init.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
os
import
warnings
import
theano
import
theano.sandbox.cuda
from
theano
import
config
def
set_gpu_from_theano
():
"""
This set the GPU used by PyCUDA to the same as the one used by Theano.
"""
# Transfer the theano gpu binding to pycuda, for consistency
if
config
.
device
.
startswith
(
"gpu"
)
and
len
(
config
.
device
)
>
3
:
os
.
environ
[
"CUDA_DEVICE"
]
=
theano
.
config
.
device
[
3
:]
elif
(
config
.
init_gpu_device
.
startswith
(
"gpu"
)
and
len
(
config
.
init_gpu_device
)
>
3
):
os
.
environ
[
"CUDA_DEVICE"
]
=
theano
.
config
.
init_gpu_device
[
3
:]
set_gpu_from_theano
()
pycuda_available
=
False
# If theano.sandbox.cuda don't exist, it is because we are importing
# it and it try to import this file! This mean we must init the device.
if
(
not
hasattr
(
theano
.
sandbox
,
'cuda'
)
or
theano
.
sandbox
.
cuda
.
use
.
device_number
is
None
):
try
:
import
pycuda
import
pycuda.autoinit
pycuda_available
=
True
except
(
ImportError
,
RuntimeError
):
# presumably, the user wanted to use pycuda, else they wouldn't have
# imported this module, so issue a warning that the import failed.
warnings
.
warn
(
"PyCUDA import failed in theano.misc.pycuda_init"
)
except
pycuda
.
_driver
.
LogicError
:
if
theano
.
config
.
force_device
:
raise
else
:
if
"CUDA_DEVICE"
in
os
.
environ
:
del
os
.
environ
[
"CUDA_DEVICE"
]
import
pycuda.autoinit
pycuda_available
=
True
else
:
try
:
import
pycuda.driver
pycuda_available
=
True
except
ImportError
:
pass
if
pycuda_available
:
if
hasattr
(
pycuda
.
driver
.
Context
,
"attach"
):
pycuda
.
driver
.
Context
.
attach
()
import
atexit
atexit
.
register
(
pycuda
.
driver
.
Context
.
pop
)
else
:
# Now we always import this file when we call
# theano.sandbox.cuda.use. So this should not happen
# normally.
# TODO: make this an error.
warnings
.
warn
(
"For some unknow reason, theano.misc.pycuda_init was"
" not imported before Theano initialized the GPU and"
" your PyCUDA version is 2011.2.2 or earlier."
" To fix the problem, import theano.misc.pycuda_init"
" manually before using/initializing the GPU, use the"
" Theano flag pycuda.init=True or use a"
" more recent version of PyCUDA."
)
theano/misc/pycuda_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
pycuda.gpuarray
from
theano.sandbox
import
cuda
if
cuda
.
cuda_available
is
False
:
raise
ImportError
(
'Optional theano package cuda disabled'
)
def
to_gpuarray
(
x
,
copyif
=
False
):
""" take a CudaNdarray and return a pycuda.gpuarray.GPUArray
:type x: CudaNdarray
:param x: The array to transform to pycuda.gpuarray.GPUArray.
:type copyif: bool
:param copyif: If False, raise an error if x is not c contiguous.
If it is c contiguous, we return a GPUArray that share
the same memory region as x.
If True, copy x if it is no c contiguous, so the return won't
shape the same memory region. If c contiguous, the return
will share the same memory region.
We need to do this as GPUArray don't fully support strided memory.
:return type: pycuda.gpuarray.GPUArray
"""
if
not
isinstance
(
x
,
cuda
.
CudaNdarray
):
raise
ValueError
(
"We can transfer only CudaNdarray to pycuda.gpuarray.GPUArray"
)
else
:
# Check if it is c contiguous
size
=
1
c_contiguous
=
True
for
i
in
range
(
x
.
ndim
-
1
,
-
1
,
-
1
):
if
x
.
shape
[
i
]
==
1
:
continue
if
x
.
_strides
[
i
]
!=
size
:
c_contiguous
=
False
break
size
*=
x
.
shape
[
i
]
if
not
c_contiguous
:
if
copyif
:
x
=
x
.
copy
()
else
:
raise
ValueError
(
"We were asked to not copy memory, but the memory is not c contiguous."
)
# Now x is always c contiguous
px
=
pycuda
.
gpuarray
.
GPUArray
(
x
.
shape
,
x
.
dtype
,
base
=
x
,
gpudata
=
x
.
gpudata
)
return
px
def
to_cudandarray
(
x
):
""" take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory
:note: CudaNdarray support only float32, so only float32 GPUArray are accepted
"""
if
not
isinstance
(
x
,
pycuda
.
gpuarray
.
GPUArray
):
raise
ValueError
(
"We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray"
)
elif
x
.
dtype
!=
"float32"
:
raise
ValueError
(
"CudaNdarray support only float32"
)
else
:
strides
=
[
1
]
for
i
in
x
.
shape
[::
-
1
][:
-
1
]:
strides
.
append
(
strides
[
-
1
]
*
i
)
strides
=
tuple
(
strides
[::
-
1
])
ptr
=
int
(
x
.
gpudata
)
# in pycuda trunk, y.ptr also works, which is a little cleaner
z
=
cuda
.
from_gpu_pointer
(
ptr
,
x
.
shape
,
strides
,
x
)
return
z
theano/misc/tests/test_cudamat_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
import
theano
from
theano.misc.cudamat_utils
import
cudamat_available
if
not
cudamat_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
"gnumpy not installed. Skip test of theano op with pycuda "
"code."
)
from
theano.misc.cudamat_utils
import
(
cudandarray_to_cudamat
,
cudamat_to_cudandarray
)
def
test
(
shape
=
(
3
,
4
)):
"""
Make sure that the cudamat conversion is exact.
"""
gpu
=
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
U
=
gpu
(
theano
.
tensor
.
fmatrix
(
'U'
))
ii
=
theano
.
function
([
U
],
gpu
(
U
+
1
))
A_cpu
=
np
.
asarray
(
np
.
random
.
rand
(
*
shape
),
dtype
=
"float32"
)
A_cnd
=
theano
.
sandbox
.
cuda
.
CudaNdarray
(
A_cpu
)
A_cmat
=
cudandarray_to_cudamat
(
A_cnd
)
B_cnd
=
cudamat_to_cudandarray
(
A_cmat
)
B_cnd
=
ii
(
A_cnd
)
u
=
A_cnd
.
copy
()
u
+=
theano
.
sandbox
.
cuda
.
CudaNdarray
(
np
.
asarray
([[
1
]],
dtype
=
'float32'
))
u
=
np
.
asarray
(
u
)
v
=
np
.
asarray
(
B_cnd
)
w
=
A_cmat
.
add
(
1
)
.
asarray
()
assert
abs
(
u
-
v
)
.
max
()
==
0
assert
abs
(
u
-
w
.
T
.
reshape
(
u
.
shape
))
.
max
()
==
0
theano/misc/tests/test_gnumpy_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
import
theano
from
theano.misc.gnumpy_utils
import
gnumpy_available
if
not
gnumpy_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
"gnumpy not installed. Skip test related to it."
)
from
theano.misc.gnumpy_utils
import
(
garray_to_cudandarray
,
cudandarray_to_garray
)
import
gnumpy
def
test
(
shape
=
(
3
,
4
,
5
)):
"""
Make sure that the gnumpy conversion is exact from garray to
CudaNdarray back to garray.
"""
gpu
=
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
U
=
gpu
(
theano
.
tensor
.
ftensor3
(
'U'
))
ii
=
theano
.
function
([
U
],
gpu
(
U
+
1
))
A
=
gnumpy
.
rand
(
*
shape
)
A_cnd
=
garray_to_cudandarray
(
A
)
assert
A_cnd
.
shape
==
A
.
shape
# dtype always float32
# garray don't have strides
B_cnd
=
ii
(
A_cnd
)
B
=
cudandarray_to_garray
(
B_cnd
)
assert
A_cnd
.
shape
==
A
.
shape
u
=
(
A
+
1
)
.
asarray
()
v
=
B
.
asarray
()
w
=
np
.
array
(
B_cnd
)
assert
(
u
==
v
)
.
all
()
assert
(
u
==
w
)
.
all
()
def
test2
(
shape
=
(
3
,
4
,
5
)):
"""
Make sure that the gnumpy conversion is exact from CudaNdarray to
garray back to CudaNdarray.
"""
gpu
=
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
U
=
gpu
(
theano
.
tensor
.
ftensor3
(
'U'
))
theano
.
function
([
U
],
gpu
(
U
+
1
))
A
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
'float32'
)
A_cnd
=
theano
.
sandbox
.
cuda
.
CudaNdarray
(
A
)
A_gar
=
cudandarray_to_garray
(
A_cnd
)
assert
A_cnd
.
shape
==
A_gar
.
shape
# dtype always float32
# garray don't have strides
B
=
garray_to_cudandarray
(
A_gar
)
assert
A_cnd
.
shape
==
B
.
shape
# dtype always float32
assert
A_cnd
.
_strides
==
B
.
_strides
assert
A_cnd
.
gpudata
==
B
.
gpudata
v
=
np
.
asarray
(
B
)
assert
(
v
==
A
)
.
all
()
def
test_broadcast_dims
():
"""
Test with some dimensions being 1.
CudaNdarray use 0 for strides for those dimensions.
"""
test
((
1
,
2
,
3
))
test
((
2
,
1
,
3
))
test
((
2
,
3
,
1
))
test2
((
1
,
2
,
3
))
test2
((
2
,
1
,
3
))
test2
((
2
,
3
,
1
))
theano/misc/tests/test_may_share_memory.py
浏览文件 @
9cde027a
"""
test the tensor and sparse type. The CudaNdarray type is tested in
sandbox/cuda/tests/test_tensor_op.py.test_may_share_memory_cuda
test the tensor and sparse type. (gpuarray is tested in the gpuarray folder).
"""
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
...
...
@@ -15,9 +14,7 @@ except ImportError:
from
theano.misc.may_share_memory
import
may_share_memory
def
test_may_share_memory
():
a
=
np
.
random
.
rand
(
5
,
4
)
b
=
np
.
random
.
rand
(
5
,
4
)
def
may_share_memory_core
(
a
,
b
):
va
=
a
.
view
()
vb
=
b
.
view
()
ra
=
a
.
reshape
((
4
,
5
))
...
...
@@ -51,6 +48,13 @@ def test_may_share_memory():
except
TypeError
:
pass
def
test_may_share_memory
():
a
=
np
.
random
.
rand
(
5
,
4
)
b
=
np
.
random
.
rand
(
5
,
4
)
may_share_memory_core
(
a
,
b
)
if
scipy_imported
:
def
test_may_share_memory_scipy
():
a
=
scipy
.
sparse
.
csc_matrix
(
scipy
.
sparse
.
eye
(
5
,
3
))
...
...
theano/misc/tests/test_pkl_utils.py
浏览文件 @
9cde027a
...
...
@@ -5,13 +5,9 @@ import unittest
from
tempfile
import
mkdtemp
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
import
theano
import
theano.sandbox.cuda
as
cuda_ndarray
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.var
import
CudaNdarraySharedVariable
from
theano.sandbox.rng_mrg
import
MRG_RandomStreams
from
theano.misc.pkl_utils
import
dump
,
load
,
StripPickler
...
...
@@ -29,24 +25,8 @@ class T_dump_load(unittest.TestCase):
if
self
.
tmpdir
is
not
None
:
shutil
.
rmtree
(
self
.
tmpdir
)
def
test_dump_load
(
self
):
if
not
cuda_ndarray
.
cuda_enabled
:
raise
SkipTest
(
'Optional package cuda disabled'
)
x
=
CudaNdarraySharedVariable
(
'x'
,
CudaNdarrayType
((
1
,
1
),
name
=
'x'
),
[[
1
]],
False
)
with
open
(
'test'
,
'wb'
)
as
f
:
dump
(
x
,
f
)
with
open
(
'test'
,
'rb'
)
as
f
:
x
=
load
(
f
)
assert
x
.
name
==
'x'
np
.
testing
.
assert_allclose
(
x
.
get_value
(),
[[
1
]])
def
test_dump_load_mrg
(
self
):
rng
=
MRG_RandomStreams
(
use_cuda
=
cuda_ndarray
.
cuda_enabled
)
rng
=
MRG_RandomStreams
()
with
open
(
'test'
,
'wb'
)
as
f
:
dump
(
rng
,
f
)
...
...
theano/misc/tests/test_pycuda_example.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
import
theano
import
theano.misc.pycuda_init
if
not
theano
.
misc
.
pycuda_init
.
pycuda_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
"Pycuda not installed. Skip test of theano op"
" with pycuda code."
)
import
theano.sandbox.cuda
as
cuda_ndarray
if
not
cuda_ndarray
.
cuda_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
'Optional package cuda disabled'
)
import
theano.tensor
as
T
from
theano.misc.pycuda_example
import
(
PycudaElemwiseSourceModuleOp
,
PycudaElemwiseSourceModuleMakeThunkOp
)
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
def
test_pycuda_elemwise_source_module
():
for
shape
in
[(
5
,
5
),
(
10
,
49
),
(
50
,
49
),
(
500
,
501
)]:
for
op
in
[
theano
.
scalar
.
basic
.
mul
,
theano
.
scalar
.
basic
.
add
]:
x
=
T
.
fmatrix
(
'x'
)
y
=
T
.
fmatrix
(
'y'
)
elemwise_op
=
theano
.
tensor
.
Elemwise
(
op
)
pycuda_op
=
PycudaElemwiseSourceModuleOp
(
op
)
pycuda_op_thunk
=
PycudaElemwiseSourceModuleMakeThunkOp
(
op
)
f
=
theano
.
function
([
x
,
y
],
elemwise_op
(
x
,
y
),
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
x
,
y
],
theano
.
sandbox
.
cuda
.
host_from_gpu
(
pycuda_op
(
x
,
y
)),
mode
=
mode_with_gpu
)
mode_pycuda
=
mode_with_gpu
.
including
(
"local_pycuda_gpu_elemwise"
)
f3
=
theano
.
function
([
x
,
y
],
elemwise_op
(
x
,
y
),
mode
=
mode_pycuda
)
f4
=
theano
.
function
([
x
,
y
],
theano
.
sandbox
.
cuda
.
host_from_gpu
(
pycuda_op_thunk
(
x
,
y
)),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
theano
.
sandbox
.
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
PycudaElemwiseSourceModuleOp
)
for
node
in
f2
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
PycudaElemwiseSourceModuleOp
)
for
node
in
f3
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
PycudaElemwiseSourceModuleMakeThunkOp
)
for
node
in
f4
.
maker
.
fgraph
.
toposort
()])
val1
=
np
.
asarray
(
np
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
val2
=
np
.
asarray
(
np
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
assert
np
.
allclose
(
f
(
val1
,
val2
),
f2
(
val1
,
val2
))
assert
np
.
allclose
(
f
(
val1
,
val2
),
f3
(
val1
,
val2
))
assert
np
.
allclose
(
f
(
val1
,
val2
),
f4
(
val1
,
val2
))
# print f(val1,val2)
# print f2(val1,val2)
"""
#commented as it work only with old pycuda version.
def test_pycuda_elemwise_kernel():
x = T.fmatrix('x')
y = T.fmatrix('y')
f = theano.function([x, y], x + y, mode=mode_with_gpu)
print(f.maker.fgraph.toposort())
mode_pycuda = mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel")
f2 = theano.function([x, y], x + y, mode=mode_pycuda)
print(f2.maker.fgraph.toposort())
assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise)
for node in f.maker.fgraph.toposort()])
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f2.maker.fgraph.toposort()])
val1 = np.asarray(np.random.rand(5, 5), dtype='float32')
val2 = np.asarray(np.random.rand(5, 5), dtype='float32')
#val1 = np.ones((5,5))
#val2 = np.arange(25).reshape(5,5)
assert (f(val1, val2) == f2(val1, val2)).all()
print(f(val1, val2))
print(f2(val1, val2))
x3 = T.ftensor3('x')
y3 = T.ftensor3('y')
z3 = T.ftensor3('y')
f4 = theano.function([x3, y3, z3], x3 * y3 + z3, mode=mode_pycuda)
print(f4.maker.fgraph.toposort())
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f4.maker.fgraph.toposort()])
val1 = np.random.rand(2, 2, 2)
print(val1)
print(f4(val1, val1, val1))
assert np.allclose(f4(val1, val1, val1), val1 * val1 + val1)
"""
theano/misc/tests/test_pycuda_theano_simple.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
"""
This file is an example of view the memory allocated by pycuda in a GpuArray
in a CudaNdarray to be able to use it in Theano.
This also serve as a test for the function: cuda_ndarray.from_gpu_pointer
"""
from
__future__
import
absolute_import
,
print_function
,
division
import
sys
import
numpy
as
np
import
theano
import
theano.sandbox.cuda
as
cuda_ndarray
import
theano.misc.pycuda_init
if
not
theano
.
misc
.
pycuda_init
.
pycuda_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
"Pycuda not installed."
" We skip tests of Theano Ops with pycuda code."
)
if
cuda_ndarray
.
cuda_available
is
False
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
'Optional theano package cuda disabled'
)
import
pycuda
import
pycuda.driver
as
drv
import
pycuda.gpuarray
def
test_pycuda_only
():
"""Run pycuda only example to test that pycuda works."""
from
pycuda.compiler
import
SourceModule
mod
=
SourceModule
(
"""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
"""
)
multiply_them
=
mod
.
get_function
(
"multiply_them"
)
# Test with pycuda in/out of numpy.ndarray
a
=
np
.
random
.
randn
(
100
)
.
astype
(
np
.
float32
)
b
=
np
.
random
.
randn
(
100
)
.
astype
(
np
.
float32
)
dest
=
np
.
zeros_like
(
a
)
multiply_them
(
drv
.
Out
(
dest
),
drv
.
In
(
a
),
drv
.
In
(
b
),
block
=
(
400
,
1
,
1
),
grid
=
(
1
,
1
))
assert
(
dest
==
a
*
b
)
.
all
()
def
test_pycuda_theano
():
"""Simple example with pycuda function and Theano CudaNdarray object."""
from
pycuda.compiler
import
SourceModule
mod
=
SourceModule
(
"""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
"""
)
multiply_them
=
mod
.
get_function
(
"multiply_them"
)
a
=
np
.
random
.
randn
(
100
)
.
astype
(
np
.
float32
)
b
=
np
.
random
.
randn
(
100
)
.
astype
(
np
.
float32
)
# Test with Theano object
ga
=
cuda_ndarray
.
CudaNdarray
(
a
)
gb
=
cuda_ndarray
.
CudaNdarray
(
b
)
dest
=
cuda_ndarray
.
CudaNdarray
.
zeros
(
a
.
shape
)
multiply_them
(
dest
,
ga
,
gb
,
block
=
(
400
,
1
,
1
),
grid
=
(
1
,
1
))
assert
(
np
.
asarray
(
dest
)
==
a
*
b
)
.
all
()
def
test_pycuda_memory_to_theano
():
# Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
y
=
pycuda
.
gpuarray
.
zeros
((
3
,
4
,
5
),
'float32'
)
print
(
sys
.
getrefcount
(
y
))
# This increase the ref count with never pycuda. Do pycuda also
# cache ndarray?
# print y.get()
initial_refcount
=
sys
.
getrefcount
(
y
)
print
(
"gpuarray ref count before creating a CudaNdarray"
,
end
=
' '
)
print
(
sys
.
getrefcount
(
y
))
assert
sys
.
getrefcount
(
y
)
==
initial_refcount
rand
=
np
.
random
.
randn
(
*
y
.
shape
)
.
astype
(
np
.
float32
)
cuda_rand
=
cuda_ndarray
.
CudaNdarray
(
rand
)
strides
=
[
1
]
for
i
in
y
.
shape
[::
-
1
][:
-
1
]:
strides
.
append
(
strides
[
-
1
]
*
i
)
strides
=
tuple
(
strides
[::
-
1
])
print
(
'strides'
,
strides
)
assert
cuda_rand
.
_strides
==
strides
,
(
cuda_rand
.
_strides
,
strides
)
# in pycuda trunk, y.ptr also works, which is a little cleaner
y_ptr
=
int
(
y
.
gpudata
)
z
=
cuda_ndarray
.
from_gpu_pointer
(
y_ptr
,
y
.
shape
,
strides
,
y
)
print
(
"gpuarray ref count after creating a CudaNdarray"
,
sys
.
getrefcount
(
y
))
assert
sys
.
getrefcount
(
y
)
==
initial_refcount
+
1
assert
(
np
.
asarray
(
z
)
==
0
)
.
all
()
assert
z
.
base
is
y
# Test that we can take a view from this cuda view on pycuda memory
zz
=
z
.
view
()
assert
sys
.
getrefcount
(
y
)
==
initial_refcount
+
2
assert
zz
.
base
is
y
del
zz
assert
sys
.
getrefcount
(
y
)
==
initial_refcount
+
1
cuda_ones
=
cuda_ndarray
.
CudaNdarray
(
np
.
asarray
([[[
1
]]],
dtype
=
'float32'
))
z
+=
cuda_ones
assert
(
np
.
asarray
(
z
)
==
np
.
ones
(
y
.
shape
))
.
all
()
assert
(
np
.
asarray
(
z
)
==
1
)
.
all
()
assert
cuda_rand
.
shape
==
z
.
shape
assert
cuda_rand
.
_strides
==
z
.
_strides
,
(
cuda_rand
.
_strides
,
z
.
_strides
)
assert
(
np
.
asarray
(
cuda_rand
)
==
rand
)
.
all
()
z
+=
cuda_rand
assert
(
np
.
asarray
(
z
)
==
(
rand
+
1
))
.
all
()
# Check that the ref count to the gpuarray is right.
del
z
print
(
"gpuarray ref count after deleting the CudaNdarray"
,
end
=
' '
)
print
(
sys
.
getrefcount
(
y
))
assert
sys
.
getrefcount
(
y
)
==
initial_refcount
theano/misc/tests/test_pycuda_utils.py
deleted
100644 → 0
浏览文件 @
80a1e8e0
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
import
theano.sandbox.cuda
as
cuda
import
theano.misc.pycuda_init
if
not
theano
.
misc
.
pycuda_init
.
pycuda_available
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
"Pycuda not installed. Skip test of theano op with pycuda "
"code."
)
if
cuda
.
cuda_available
is
False
:
# noqa
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
'Optional theano package cuda disabled'
)
from
theano.misc.pycuda_utils
import
to_gpuarray
,
to_cudandarray
import
pycuda.gpuarray
def
test_to_gpuarray
():
cx
=
cuda
.
CudaNdarray
.
zeros
((
5
,
4
))
px
=
to_gpuarray
(
cx
)
assert
isinstance
(
px
,
pycuda
.
gpuarray
.
GPUArray
)
cx
[
0
,
0
]
=
np
.
asarray
(
1
,
dtype
=
"float32"
)
# Check that they share the same memory space
assert
px
.
gpudata
==
cx
.
gpudata
assert
np
.
asarray
(
cx
[
0
,
0
])
==
1
assert
np
.
allclose
(
np
.
asarray
(
cx
),
px
.
get
())
assert
px
.
dtype
==
cx
.
dtype
assert
px
.
shape
==
cx
.
shape
assert
all
(
np
.
asarray
(
cx
.
_strides
)
*
4
==
px
.
strides
)
# Test when the CudaNdarray is strided
cx
=
cx
[::
2
,
::]
px
=
to_gpuarray
(
cx
,
copyif
=
True
)
assert
isinstance
(
px
,
pycuda
.
gpuarray
.
GPUArray
)
cx
[
0
,
0
]
=
np
.
asarray
(
2
,
dtype
=
"float32"
)
# Check that they do not share the same memory space
assert
px
.
gpudata
!=
cx
.
gpudata
assert
np
.
asarray
(
cx
[
0
,
0
])
==
2
assert
not
np
.
allclose
(
np
.
asarray
(
cx
),
px
.
get
())
assert
px
.
dtype
==
cx
.
dtype
assert
px
.
shape
==
cx
.
shape
assert
not
all
(
np
.
asarray
(
cx
.
_strides
)
*
4
==
px
.
strides
)
# Test that we return an error
try
:
px
=
to_gpuarray
(
cx
)
assert
False
except
ValueError
:
pass
def
test_to_cudandarray
():
px
=
pycuda
.
gpuarray
.
zeros
((
3
,
4
,
5
),
'float32'
)
cx
=
to_cudandarray
(
px
)
assert
isinstance
(
cx
,
cuda
.
CudaNdarray
)
assert
np
.
allclose
(
px
.
get
(),
np
.
asarray
(
cx
))
assert
px
.
dtype
==
cx
.
dtype
assert
px
.
shape
==
cx
.
shape
assert
all
(
np
.
asarray
(
cx
.
_strides
)
*
4
==
px
.
strides
)
try
:
px
=
pycuda
.
gpuarray
.
zeros
((
3
,
4
,
5
),
'float64'
)
to_cudandarray
(
px
)
assert
False
except
ValueError
:
pass
try
:
to_cudandarray
(
np
.
zeros
(
4
))
assert
False
except
ValueError
:
pass
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论